LLVM OpenMP 22.0.0git
callback.h
Go to the documentation of this file.
1#ifndef _BSD_SOURCE
2#define _BSD_SOURCE
3#endif
4#ifndef _DEFAULT_SOURCE
5#define _DEFAULT_SOURCE
6#endif
7#include <stdio.h>
8#ifndef __STDC_FORMAT_MACROS
9#define __STDC_FORMAT_MACROS
10#endif
11#include <inttypes.h>
12#include <omp.h>
13#include <omp-tools.h>
14#include "ompt-signal.h"
15#include <stdlib.h>
16#include <assert.h>
17
18// Used to detect architecture
19#include "../../src/kmp_platform.h"
20
21#ifndef _TOOL_PREFIX
22#define _TOOL_PREFIX ""
23// If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
24#define _OMPT_TESTS
25#endif
26
27static const char *ompt_thread_t_values[] = {
28 "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
29 "ompt_thread_other"};
30
31static const char *ompt_task_status_t_values[] = {
32 "ompt_task_UNDEFINED",
33 "ompt_task_complete", // 1
34 "ompt_task_yield", // 2
35 "ompt_task_cancel", // 3
36 "ompt_task_detach", // 4
37 "ompt_task_early_fulfill", // 5
38 "ompt_task_late_fulfill", // 6
39 "ompt_task_switch", // 7
40 "ompt_taskwait_complete" // 8
41};
42static const char *ompt_cancel_flag_t_values[] = {
43 "ompt_cancel_parallel", "ompt_cancel_sections",
44 "ompt_cancel_loop", "ompt_cancel_taskgroup",
45 "ompt_cancel_activated", "ompt_cancel_detected",
46 "ompt_cancel_discarded_task"};
47
48static const char *ompt_work_t_values[] = {"undefined",
49 "ompt_work_loop",
50 "ompt_work_sections",
51 "ompt_work_single_executor",
52 "ompt_work_single_other",
53 "ompt_work_workshare",
54 "ompt_work_distribute",
55 "ompt_work_taskloop",
56 "ompt_work_scope",
57 "ompt_work_workdistribute",
58 "ompt_work_loop_static",
59 "ompt_work_loop_dynamic",
60 "ompt_work_loop_guided",
61 "ompt_work_loop_other"};
62
63static const char *ompt_work_events_t_values[] = {"undefined",
64 "ompt_event_loop",
65 "ompt_event_sections",
66 "ompt_event_single_in_block",
67 "ompt_event_single_others",
68 "ompt_event_workshare",
69 "ompt_event_distribute",
70 "ompt_event_taskloop",
71 "ompt_event_scope",
72 "ompt_event_workdistribute",
73 "ompt_event_loop_static",
74 "ompt_event_loop_dynamic",
75 "ompt_event_loop_guided",
76 "ompt_event_loop_other"};
77
78static const char *ompt_dependence_type_t_values[36] = {
79 "ompt_dependence_type_UNDEFINED",
80 "ompt_dependence_type_in", // 1
81 "ompt_dependence_type_out", // 2
82 "ompt_dependence_type_inout", // 3
83 "ompt_dependence_type_mutexinoutset", // 4
84 "ompt_dependence_type_source", // 5
85 "ompt_dependence_type_sink", // 6
86 "ompt_dependence_type_inoutset", // 7
87 "",
88 "",
89 "",
90 "",
91 "",
92 "", // 8-13
93 "",
94 "",
95 "",
96 "",
97 "",
98 "",
99 "",
100 "",
101 "",
102 "", // 14-23
103 "",
104 "",
105 "",
106 "",
107 "",
108 "",
109 "",
110 "",
111 "",
112 "", // 24-33
113 "ompt_dependence_type_out_all_memory", // 34
114 "ompt_dependence_type_inout_all_memory" // 35
115};
116
117static const char *ompt_sync_region_t_values[] = {"undefined",
118 "barrier",
119 "barrier_implicit",
120 "barrier_explicit",
121 "barrier_implementation",
122 "taskwait",
123 "taskgroup",
124 "reduction",
125 "barrier_implicit_workshare",
126 "barrier_implicit_parallel",
127 "barrier_teams"};
128
129static void format_task_type(int type, char *buffer) {
130 char *progress = buffer;
131 if (type & ompt_task_initial)
132 progress += sprintf(progress, "ompt_task_initial");
133 if (type & ompt_task_implicit)
134 progress += sprintf(progress, "ompt_task_implicit");
135 if (type & ompt_task_explicit)
136 progress += sprintf(progress, "ompt_task_explicit");
137 if (type & ompt_task_target)
138 progress += sprintf(progress, "ompt_task_target");
139 if (type & ompt_task_taskwait)
140 progress += sprintf(progress, "ompt_task_taskwait");
141 if (type & ompt_task_undeferred)
142 progress += sprintf(progress, "|ompt_task_undeferred");
143 if (type & ompt_task_untied)
144 progress += sprintf(progress, "|ompt_task_untied");
145 if (type & ompt_task_final)
146 progress += sprintf(progress, "|ompt_task_final");
147 if (type & ompt_task_mergeable)
148 progress += sprintf(progress, "|ompt_task_mergeable");
149 if (type & ompt_task_merged)
150 progress += sprintf(progress, "|ompt_task_merged");
151}
152
153static ompt_set_callback_t ompt_set_callback;
154static ompt_get_callback_t ompt_get_callback;
155static ompt_get_state_t ompt_get_state;
156static ompt_get_task_info_t ompt_get_task_info;
157static ompt_get_task_memory_t ompt_get_task_memory;
158static ompt_get_thread_data_t ompt_get_thread_data;
159static ompt_get_parallel_info_t ompt_get_parallel_info;
160static ompt_get_unique_id_t ompt_get_unique_id;
161static ompt_finalize_tool_t ompt_finalize_tool;
162static ompt_get_num_procs_t ompt_get_num_procs;
163static ompt_get_num_places_t ompt_get_num_places;
164static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
165static ompt_get_place_num_t ompt_get_place_num;
166static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
167static ompt_get_proc_id_t ompt_get_proc_id;
168static ompt_enumerate_states_t ompt_enumerate_states;
169static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
170
171void assert_frame_flags(int enterf, int exitf) {
172 if (!(enterf == (ompt_frame_application | ompt_frame_cfa) ||
173 enterf == (ompt_frame_runtime | ompt_frame_cfa))) {
174 printf("enter_frame_flags (%i) is invalid\n", enterf);
175 fflush(NULL);
176 }
177 if (!(exitf == (ompt_frame_application | ompt_frame_cfa) ||
178 exitf == (ompt_frame_runtime | ompt_frame_cfa))) {
179 printf("exit_frame_flags (%i) is invalid\n", exitf);
180 fflush(NULL);
181 }
182 assert(enterf == (ompt_frame_application | ompt_frame_cfa) ||
183 enterf == (ompt_frame_runtime | ompt_frame_cfa));
184 assert(exitf == (ompt_frame_application | ompt_frame_cfa) ||
185 exitf == (ompt_frame_runtime | ompt_frame_cfa));
186}
187static void print_ids(int level) {
188 int task_type, thread_num;
189 ompt_frame_t *frame;
190 ompt_data_t *task_parallel_data;
191 ompt_data_t *task_data;
192 int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
193 &task_parallel_data, &thread_num);
194 char buffer[2048];
195 format_task_type(task_type, buffer);
196 if (frame) {
197 printf("%" PRIu64 ": task level %d: parallel_id=%" PRIx64
198 ", task_id=%" PRIx64 ", exit_frame=%p, reenter_frame=%p, "
199 "task_type=%s=%d, thread_num=%d\n",
201 exists_task ? task_parallel_data->value : 0,
202 exists_task ? task_data->value : 0, frame->exit_frame.ptr,
203 frame->enter_frame.ptr, buffer, task_type, thread_num);
204 assert_frame_flags(frame->enter_frame_flags, frame->exit_frame_flags);
205 }
206}
207
208#define get_frame_address(level) __builtin_frame_address(level)
209
210#define print_frame(level) \
211 printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \
212 ompt_get_thread_data()->value, level, get_frame_address(level))
213
214// clang (version 5.0 and above) adds an intermediate function call with debug
215// flag (-g)
216#if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
217#if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
218#define print_frame_from_outlined_fn(level) print_frame(level + 1)
219#else
220#define print_frame_from_outlined_fn(level) print_frame(level)
221#endif
222
223#if defined(__clang__) && __clang_major__ >= 5
224#warning \
225 "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
226#warning \
227 "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
228#endif
229#endif
230
231// This macro helps to define a label at the current position that can be used
232// to get the current address in the code.
233//
234// For print_current_address():
235// To reliably determine the offset between the address of the label and the
236// actual return address, we insert a NOP instruction as a jump target as the
237// compiler would otherwise insert an instruction that we can't control. The
238// instruction length is target dependent and is explained below.
239//
240// (The empty block between "#pragma omp ..." and the __asm__ statement is a
241// workaround for a bug in the Intel Compiler.)
242#define define_ompt_label(id) \
243 { \
244 } \
245 __asm__("nop"); \
246 ompt_label_##id:
247
248// This macro helps to get the address of a label that is inserted by the above
249// macro define_ompt_label(). The address is obtained with a GNU extension
250// (&&label) that has been tested with gcc, clang and icc.
251#define get_ompt_label_address(id) (&&ompt_label_##id)
252
253// This macro prints the exact address that a previously called runtime function
254// returns to.
255#define print_current_address(id) \
256 define_ompt_label(id) \
257 print_possible_return_addresses(get_ompt_label_address(id))
258
259#if KMP_ARCH_X86 || KMP_ARCH_X86_64
260// On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
261// a MOV instruction for non-void runtime functions which is 3 bytes long.
262#define print_possible_return_addresses(addr) \
263 printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
264 ompt_get_thread_data()->value, ((char *)addr) - 1, \
265 ((char *)addr) - 4)
266#elif KMP_ARCH_PPC64
267// On Power the NOP instruction is 4 bytes long. In addition, the compiler
268// inserts a second NOP instruction (another 4 bytes). For non-void runtime
269// functions Clang inserts a STW instruction (but only if compiling under
270// -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
271#define print_possible_return_addresses(addr) \
272 printf("%" PRIu64 ": current_address=%p or %p\n", \
273 ompt_get_thread_data()->value, ((char *)addr) - 8, \
274 ((char *)addr) - 12)
275#elif KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32
276// On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
277// store instruction (another 4 bytes long).
278// FIXME: PR #65696 addded a third possibility (12 byte offset) to make the
279// tests pass on Darwin. Adding the same for other OSes. However, the proper
280// fix for this is to remove the extra branch instruction being generated by
281// the AArch64 backend. See issue #69627.
282#define print_possible_return_addresses(addr) \
283 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
284 ompt_get_thread_data()->value, ((char *)addr) - 4, \
285 ((char *)addr) - 8, ((char *)addr) - 12)
286#elif KMP_ARCH_RISCV64
287#if __riscv_compressed
288// On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
289// inserts a J instruction (targeting the successor basic block), which
290// accounts for another 4 bytes. Finally, an additional J instruction may
291// appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
292// another branch).
293#define print_possible_return_addresses(addr) \
294 printf("%" PRIu64 ": current_address=%p or %p\n", \
295 ompt_get_thread_data()->value, ((char *)addr) - 6, \
296 ((char *)addr) - 10)
297#else
298// On RV64G the NOP instruction is 4 byte long. In addition, the compiler
299// inserts a J instruction (targeting the successor basic block), which
300// accounts for another 4 bytes. Finally, an additional J instruction may
301// appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
302// another branch).
303#define print_possible_return_addresses(addr) \
304 printf("%" PRIu64 ": current_address=%p or %p\n", \
305 ompt_get_thread_data()->value, ((char *)addr) - 8, \
306 ((char *)addr) - 12)
307#endif
308#elif KMP_ARCH_LOONGARCH64
309// On LoongArch64 the NOP instruction is 4 bytes long, can be followed by
310// inserted jump instruction (another 4 bytes long). And an additional jump
311// instruction may appear (adding 4 more bytes) when the NOP is referenced
312// elsewhere (ie. another branch).
313#define print_possible_return_addresses(addr) \
314 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
315 ompt_get_thread_data()->value, ((char *)addr) - 4, \
316 ((char *)addr) - 8, ((char *)addr) - 12)
317#elif KMP_ARCH_VE
318// On VE the NOP instruction is 8 byte long. In addition, the compiler inserts
319// a ??? instruction for non-void runtime functions which is ? bytes long.
320#define print_possible_return_addresses(addr) \
321 printf("%" PRIu64 ": current_address=%p or %p\n", \
322 ompt_get_thread_data()->value, ((char *)addr) - 8, \
323 ((char *)addr) - 8)
324#elif KMP_ARCH_S390X
325// On s390x the NOP instruction is 2 bytes long. For non-void runtime
326// functions Clang inserts a STY instruction (but only if compiling under
327// -fno-PIC which will be the default with Clang 8.0, another 6 bytes).
328//
329// Another possibility is:
330//
331// brasl %r14,__kmpc_end_master@plt
332// a7 f4 00 02 j 0f
333// 47 00 00 00 0: nop
334// a7 f4 00 02 j addr
335// addr:
336#define print_possible_return_addresses(addr) \
337 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
338 ompt_get_thread_data()->value, ((char *)addr) - 2, \
339 ((char *)addr) - 8, ((char *)addr) - 12)
340#elif KMP_ARCH_SPARC
341// FIXME: Need to distinguish between 32 and 64-bit SPARC?
342// On SPARC the NOP instruction is 4 bytes long.
343// FIXME: Explain. Can use __builtin_frob_return_addr?
344#define print_possible_return_addresses(addr) \
345 printf("%" PRIu64 ": current_address=%p or %p\n", \
346 ompt_get_thread_data()->value, ((char *)addr) - 12, \
347 (char *)addr - 20)
348#else
349#error Unsupported target architecture, cannot determine address offset!
350#endif
351
352// This macro performs a somewhat similar job to print_current_address(), except
353// that it discards a certain number of nibbles from the address and only prints
354// the most significant bits / nibbles. This can be used for cases where the
355// return address can only be approximated.
356//
357// To account for overflows (ie the most significant bits / nibbles have just
358// changed as we are a few bytes above the relevant power of two) the addresses
359// of the "current" and of the "previous block" are printed.
360#define print_fuzzy_address(id) \
361 define_ompt_label(id) print_fuzzy_address_blocks(get_ompt_label_address(id))
362
363// If you change this define you need to adapt all capture patterns in the tests
364// to include or discard the new number of nibbles!
365#define FUZZY_ADDRESS_DISCARD_NIBBLES 2
366#define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
367#define print_fuzzy_address_blocks(addr) \
368 printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \
369 " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \
370 ompt_get_thread_data()->value, \
371 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \
372 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \
373 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \
374 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
375
376#define register_ompt_callback_t(name, type) \
377 do { \
378 type f_##name = &on_##name; \
379 if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \
380 printf("0: Could not register callback '" #name "'\n"); \
381 } while (0)
382
383#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t)
384
385#ifndef USE_PRIVATE_TOOL
386static void on_ompt_callback_mutex_acquire(ompt_mutex_t kind, unsigned int hint,
387 unsigned int impl,
388 ompt_wait_id_t wait_id,
389 const void *codeptr_ra) {
390 switch (kind) {
391 case ompt_mutex_lock:
392 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_wait_lock: wait_id=%" PRIu64
393 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
394 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
395 break;
396 case ompt_mutex_test_lock:
397 printf("%" PRIu64 ":" _TOOL_PREFIX
398 " ompt_event_wait_test_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
399 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
400 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
401 break;
402 case ompt_mutex_nest_lock:
403 printf("%" PRIu64 ":" _TOOL_PREFIX
404 " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
405 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
406 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
407 break;
408 case ompt_mutex_test_nest_lock:
409 printf("%" PRIu64 ":" _TOOL_PREFIX
410 " ompt_event_wait_test_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
411 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
412 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
413 break;
414 case ompt_mutex_critical:
415 printf("%" PRIu64 ":" _TOOL_PREFIX
416 " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
417 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
418 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
419 break;
420 case ompt_mutex_atomic:
421 printf("%" PRIu64 ":" _TOOL_PREFIX
422 " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
423 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
424 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
425 break;
426 case ompt_mutex_ordered:
427 printf("%" PRIu64 ":" _TOOL_PREFIX
428 " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
429 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
430 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
431 break;
432 default:
433 break;
434 }
435}
436
437static void on_ompt_callback_mutex_acquired(ompt_mutex_t kind,
438 ompt_wait_id_t wait_id,
439 const void *codeptr_ra) {
440 switch (kind) {
441 case ompt_mutex_lock:
442 printf("%" PRIu64 ":" _TOOL_PREFIX
443 " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
444 ompt_get_thread_data()->value, wait_id, codeptr_ra);
445 break;
446 case ompt_mutex_test_lock:
447 printf("%" PRIu64 ":" _TOOL_PREFIX
448 " ompt_event_acquired_test_lock: wait_id=%" PRIu64
449 ", codeptr_ra=%p \n",
450 ompt_get_thread_data()->value, wait_id, codeptr_ra);
451 break;
452 case ompt_mutex_nest_lock:
453 printf("%" PRIu64 ":" _TOOL_PREFIX
454 " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
455 ", codeptr_ra=%p \n",
456 ompt_get_thread_data()->value, wait_id, codeptr_ra);
457 break;
458 case ompt_mutex_test_nest_lock:
459 printf("%" PRIu64 ":" _TOOL_PREFIX
460 " ompt_event_acquired_test_nest_lock_first: wait_id=%" PRIu64
461 ", codeptr_ra=%p \n",
462 ompt_get_thread_data()->value, wait_id, codeptr_ra);
463 break;
464 case ompt_mutex_critical:
465 printf("%" PRIu64 ":" _TOOL_PREFIX
466 " ompt_event_acquired_critical: wait_id=%" PRIu64
467 ", codeptr_ra=%p \n",
468 ompt_get_thread_data()->value, wait_id, codeptr_ra);
469 break;
470 case ompt_mutex_atomic:
471 printf("%" PRIu64 ":" _TOOL_PREFIX
472 " ompt_event_acquired_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
473 ompt_get_thread_data()->value, wait_id, codeptr_ra);
474 break;
475 case ompt_mutex_ordered:
476 printf("%" PRIu64 ":" _TOOL_PREFIX
477 " ompt_event_acquired_ordered: wait_id=%" PRIu64
478 ", codeptr_ra=%p \n",
479 ompt_get_thread_data()->value, wait_id, codeptr_ra);
480 break;
481 default:
482 break;
483 }
484}
485
486static void on_ompt_callback_mutex_released(ompt_mutex_t kind,
487 ompt_wait_id_t wait_id,
488 const void *codeptr_ra) {
489 switch (kind) {
490 case ompt_mutex_lock:
491 printf("%" PRIu64 ":" _TOOL_PREFIX
492 " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
493 ompt_get_thread_data()->value, wait_id, codeptr_ra);
494 break;
495 case ompt_mutex_nest_lock:
496 printf("%" PRIu64 ":" _TOOL_PREFIX
497 " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
498 ", codeptr_ra=%p \n",
499 ompt_get_thread_data()->value, wait_id, codeptr_ra);
500 break;
501 case ompt_mutex_critical:
502 printf("%" PRIu64 ":" _TOOL_PREFIX
503 " ompt_event_release_critical: wait_id=%" PRIu64
504 ", codeptr_ra=%p \n",
505 ompt_get_thread_data()->value, wait_id, codeptr_ra);
506 break;
507 case ompt_mutex_atomic:
508 printf("%" PRIu64 ":" _TOOL_PREFIX
509 " ompt_event_release_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
510 ompt_get_thread_data()->value, wait_id, codeptr_ra);
511 break;
512 case ompt_mutex_ordered:
513 printf("%" PRIu64 ":" _TOOL_PREFIX
514 " ompt_event_release_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
515 ompt_get_thread_data()->value, wait_id, codeptr_ra);
516 break;
517 default:
518 break;
519 }
520}
521
522static void on_ompt_callback_nest_lock(ompt_scope_endpoint_t endpoint,
523 ompt_wait_id_t wait_id,
524 const void *codeptr_ra) {
525 switch (endpoint) {
526 case ompt_scope_begin:
527 printf("%" PRIu64 ":" _TOOL_PREFIX
528 " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
529 ", codeptr_ra=%p \n",
530 ompt_get_thread_data()->value, wait_id, codeptr_ra);
531 break;
532 case ompt_scope_end:
533 printf("%" PRIu64 ":" _TOOL_PREFIX
534 " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
535 ", codeptr_ra=%p \n",
536 ompt_get_thread_data()->value, wait_id, codeptr_ra);
537 break;
538 case ompt_scope_beginend:
539 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
540 exit(-1);
541 }
542}
543
544static void on_ompt_callback_sync_region(ompt_sync_region_t kind,
545 ompt_scope_endpoint_t endpoint,
546 ompt_data_t *parallel_data,
547 ompt_data_t *task_data,
548 const void *codeptr_ra) {
549 if (endpoint == ompt_scope_beginend) {
550 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
551 exit(-1);
552 }
553 if (kind == ompt_sync_region_reduction) {
554 printf("ompt_sync_region_reduction should never be passed to %s\n",
555 __func__);
556 exit(-1);
557 }
558 uint64_t parallel_data_value = parallel_data ? parallel_data->value : 0;
559 const char *begin_or_end = (endpoint == ompt_scope_begin) ? "begin" : "end";
560 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_%s: parallel_id=%" PRIx64
561 ", task_id=%" PRIx64 ", codeptr_ra=%p\n",
563 begin_or_end, parallel_data_value, task_data->value, codeptr_ra);
564 switch (kind) {
565 case ompt_sync_region_barrier:
566 case ompt_sync_region_barrier_implicit:
567 case ompt_sync_region_barrier_implicit_workshare:
568 case ompt_sync_region_barrier_implicit_parallel:
569 case ompt_sync_region_barrier_teams:
570 case ompt_sync_region_barrier_explicit:
571 case ompt_sync_region_barrier_implementation:
572 if (endpoint == ompt_scope_begin)
573 print_ids(0);
574 default:;
575 }
576}
577
578static void on_ompt_callback_sync_region_wait(ompt_sync_region_t kind,
579 ompt_scope_endpoint_t endpoint,
580 ompt_data_t *parallel_data,
581 ompt_data_t *task_data,
582 const void *codeptr_ra) {
583 if (endpoint == ompt_scope_beginend) {
584 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
585 exit(-1);
586 }
587 if (kind == ompt_sync_region_reduction) {
588 printf("ompt_sync_region_reduction should never be passed to %s\n",
589 __func__);
590 exit(-1);
591 }
592 uint64_t parallel_data_value = parallel_data ? parallel_data->value : 0;
593 const char *begin_or_end = (endpoint == ompt_scope_begin) ? "begin" : "end";
594 printf("%" PRIu64 ":" _TOOL_PREFIX
595 " ompt_event_wait_%s_%s: parallel_id=%" PRIx64 ", task_id=%" PRIx64
596 ", codeptr_ra=%p\n",
598 begin_or_end, parallel_data_value, task_data->value, codeptr_ra);
599}
600
601static void on_ompt_callback_reduction(ompt_sync_region_t kind,
602 ompt_scope_endpoint_t endpoint,
603 ompt_data_t *parallel_data,
604 ompt_data_t *task_data,
605 const void *codeptr_ra) {
606 switch (endpoint) {
607 case ompt_scope_begin:
608 printf("%" PRIu64 ":" _TOOL_PREFIX
609 " ompt_event_reduction_begin: parallel_id=%" PRIx64
610 ", task_id=%" PRIx64 ", codeptr_ra=%p\n",
612 (parallel_data) ? parallel_data->value : 0, task_data->value,
613 codeptr_ra);
614 break;
615 case ompt_scope_end:
616 printf("%" PRIu64 ":" _TOOL_PREFIX
617 " ompt_event_reduction_end: parallel_id=%" PRIx64
618 ", task_id=%" PRIx64 ", codeptr_ra=%p\n",
620 (parallel_data) ? parallel_data->value : 0, task_data->value,
621 codeptr_ra);
622 break;
623 case ompt_scope_beginend:
624 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
625 exit(-1);
626 }
627}
628
629static void on_ompt_callback_flush(ompt_data_t *thread_data,
630 const void *codeptr_ra) {
631 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
632 thread_data->value, codeptr_ra);
633}
634
635static void on_ompt_callback_cancel(ompt_data_t *task_data, int flags,
636 const void *codeptr_ra) {
637 const char *first_flag_value;
638 const char *second_flag_value;
639 if (flags & ompt_cancel_parallel)
640 first_flag_value = ompt_cancel_flag_t_values[0];
641 else if (flags & ompt_cancel_sections)
642 first_flag_value = ompt_cancel_flag_t_values[1];
643 else if (flags & ompt_cancel_loop)
644 first_flag_value = ompt_cancel_flag_t_values[2];
645 else if (flags & ompt_cancel_taskgroup)
646 first_flag_value = ompt_cancel_flag_t_values[3];
647
648 if (flags & ompt_cancel_activated)
649 second_flag_value = ompt_cancel_flag_t_values[4];
650 else if (flags & ompt_cancel_detected)
651 second_flag_value = ompt_cancel_flag_t_values[5];
652 else if (flags & ompt_cancel_discarded_task)
653 second_flag_value = ompt_cancel_flag_t_values[6];
654
655 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_id=%" PRIx64
656 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
657 ompt_get_thread_data()->value, task_data->value, first_flag_value,
658 second_flag_value, flags, codeptr_ra);
659}
660
661static void on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,
662 ompt_data_t *parallel_data,
663 ompt_data_t *task_data,
664 unsigned int team_size,
665 unsigned int thread_num, int flags) {
666 switch (endpoint) {
667 case ompt_scope_begin:
668 if (task_data->ptr)
669 printf("%s\n", "0: task_data initially not null");
670 task_data->value = ompt_get_unique_id();
671
672 // there is no parallel_begin callback for implicit parallel region
673 // thus it is initialized in initial task
674 if (flags & ompt_task_initial) {
675 char buffer[2048];
676
677 format_task_type(flags, buffer);
678 // Only check initial task not created by teams construct
679 if (team_size == 1 && thread_num == 1 && parallel_data->ptr)
680 printf("%s\n", "0: parallel_data initially not null");
681 parallel_data->value = ompt_get_unique_id();
682 printf("%" PRIu64 ":" _TOOL_PREFIX
683 " ompt_event_initial_task_begin: parallel_id=%" PRIx64
684 ", task_id=%" PRIx64 ", actual_parallelism=%" PRIu32
685 ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
686 ompt_get_thread_data()->value, parallel_data->value,
687 task_data->value, team_size, thread_num, flags);
688 } else {
689 printf("%" PRIu64 ":" _TOOL_PREFIX
690 " ompt_event_implicit_task_begin: parallel_id=%" PRIx64
691 ", task_id=%" PRIx64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32
692 "\n",
693 ompt_get_thread_data()->value, parallel_data->value,
694 task_data->value, team_size, thread_num);
695 }
696
697 break;
698 case ompt_scope_end:
699 if (flags & ompt_task_initial) {
700 printf("%" PRIu64 ":" _TOOL_PREFIX
701 " ompt_event_initial_task_end: parallel_id=%" PRIx64
702 ", task_id=%" PRIx64 ", actual_parallelism=%" PRIu32
703 ", index=%" PRIu32 "\n",
705 (parallel_data) ? parallel_data->value : 0, task_data->value,
706 team_size, thread_num);
707 } else {
708 printf("%" PRIu64 ":" _TOOL_PREFIX
709 " ompt_event_implicit_task_end: parallel_id=%" PRIx64
710 ", task_id=%" PRIx64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32
711 "\n",
713 (parallel_data) ? parallel_data->value : 0, task_data->value,
714 team_size, thread_num);
715 }
716 break;
717 case ompt_scope_beginend:
718 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
719 exit(-1);
720 }
721}
722
723static void on_ompt_callback_lock_init(ompt_mutex_t kind, unsigned int hint,
724 unsigned int impl,
725 ompt_wait_id_t wait_id,
726 const void *codeptr_ra) {
727 switch (kind) {
728 case ompt_mutex_lock:
729 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_init_lock: wait_id=%" PRIu64
730 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
731 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
732 break;
733 case ompt_mutex_nest_lock:
734 printf("%" PRIu64 ":" _TOOL_PREFIX
735 " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
736 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
737 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
738 break;
739 default:
740 break;
741 }
742}
743
744static void on_ompt_callback_lock_destroy(ompt_mutex_t kind,
745 ompt_wait_id_t wait_id,
746 const void *codeptr_ra) {
747 switch (kind) {
748 case ompt_mutex_lock:
749 printf("%" PRIu64 ":" _TOOL_PREFIX
750 " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
751 ompt_get_thread_data()->value, wait_id, codeptr_ra);
752 break;
753 case ompt_mutex_nest_lock:
754 printf("%" PRIu64 ":" _TOOL_PREFIX
755 " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
756 ", codeptr_ra=%p \n",
757 ompt_get_thread_data()->value, wait_id, codeptr_ra);
758 break;
759 default:
760 break;
761 }
762}
763
764static void on_ompt_callback_work(ompt_work_t wstype,
765 ompt_scope_endpoint_t endpoint,
766 ompt_data_t *parallel_data,
767 ompt_data_t *task_data, uint64_t count,
768 const void *codeptr_ra) {
769 switch (endpoint) {
770 case ompt_scope_begin:
771 printf("%" PRIu64 ":" _TOOL_PREFIX " %s_begin: parallel_id=%" PRIx64
772 ", task_id=%" PRIx64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
774 parallel_data->value, task_data->value, codeptr_ra, count);
775 break;
776 case ompt_scope_end:
777 printf("%" PRIu64 ":" _TOOL_PREFIX " %s_end: parallel_id=%" PRIx64
778 ", task_id=%" PRIx64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
780 parallel_data->value, task_data->value, codeptr_ra, count);
781 break;
782 case ompt_scope_beginend:
783 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
784 exit(-1);
785 }
786}
787
788static void on_ompt_callback_dispatch(ompt_data_t *parallel_data,
789 ompt_data_t *task_data,
790 ompt_dispatch_t kind,
791 ompt_data_t instance) {
792 char *event_name = NULL;
793 void *codeptr_ra = NULL;
794 ompt_dispatch_chunk_t *dispatch_chunk = NULL;
795 switch (kind) {
796 case ompt_dispatch_section:
797 event_name = "ompt_event_section_begin";
798 codeptr_ra = instance.ptr;
799 break;
800 case ompt_dispatch_ws_loop_chunk:
801 event_name = "ompt_event_ws_loop_chunk_begin";
802 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
803 break;
804 case ompt_dispatch_taskloop_chunk:
805 event_name = "ompt_event_taskloop_chunk_begin";
806 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
807 break;
808 case ompt_dispatch_distribute_chunk:
809 event_name = "ompt_event_distribute_chunk_begin";
810 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
811 break;
812 default:
813 event_name = "ompt_ws_loop_iteration_begin";
814 }
815 printf("%" PRIu64 ":" _TOOL_PREFIX " %s: parallel_id=%" PRIx64
816 ", task_id=%" PRIx64 ", codeptr_ra=%p, chunk_start=%" PRIu64
817 ", chunk_iterations=%" PRIu64 "\n",
818 ompt_get_thread_data()->value, event_name, parallel_data->value,
819 task_data->value, codeptr_ra,
820 dispatch_chunk ? dispatch_chunk->start : 0,
821 dispatch_chunk ? dispatch_chunk->iterations : 0);
822}
823
824static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
825 ompt_data_t *parallel_data,
826 ompt_data_t *task_data,
827 const void *codeptr_ra) {
828 switch (endpoint) {
829 case ompt_scope_begin:
830 printf("%" PRIu64 ":" _TOOL_PREFIX
831 " ompt_event_masked_begin: parallel_id=%" PRIx64 ", task_id=%" PRIx64
832 ", codeptr_ra=%p\n",
833 ompt_get_thread_data()->value, parallel_data->value,
834 task_data->value, codeptr_ra);
835 break;
836 case ompt_scope_end:
837 printf("%" PRIu64 ":" _TOOL_PREFIX
838 " ompt_event_masked_end: parallel_id=%" PRIx64 ", task_id=%" PRIx64
839 ", codeptr_ra=%p\n",
840 ompt_get_thread_data()->value, parallel_data->value,
841 task_data->value, codeptr_ra);
842 break;
843 case ompt_scope_beginend:
844 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
845 exit(-1);
846 }
847}
848
850 ompt_data_t *encountering_task_data,
851 const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
852 uint32_t requested_team_size, int flag, const void *codeptr_ra) {
853 if (parallel_data->ptr)
854 printf("0: parallel_data initially not null\n");
855 parallel_data->value = ompt_get_unique_id();
856 int invoker = flag & 0xF;
857 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
858 const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
859 printf("%" PRIu64 ":" _TOOL_PREFIX
860 " ompt_event_%s_begin: parent_task_id=%" PRIx64
861 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
862 "parallel_id=%" PRIx64 ", requested_%s=%" PRIu32
863 ", codeptr_ra=%p, invoker=%d\n",
864 ompt_get_thread_data()->value, event, encountering_task_data->value,
865 encountering_task_frame->exit_frame.ptr,
866 encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
867 requested_team_size, codeptr_ra, invoker);
868}
869
870static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
871 ompt_data_t *encountering_task_data,
872 int flag, const void *codeptr_ra) {
873 int invoker = flag & 0xF;
874 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
875 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIx64
876 ", task_id=%" PRIx64 ", invoker=%d, codeptr_ra=%p\n",
877 ompt_get_thread_data()->value, event, parallel_data->value,
878 encountering_task_data->value, invoker, codeptr_ra);
879}
880
881static void
882on_ompt_callback_task_create(ompt_data_t *encountering_task_data,
883 const ompt_frame_t *encountering_task_frame,
884 ompt_data_t *new_task_data, int type,
885 int has_dependences, const void *codeptr_ra) {
886 if (new_task_data->ptr)
887 printf("0: new_task_data initially not null\n");
888 new_task_data->value = ompt_get_unique_id();
889 char buffer[2048];
890
891 format_task_type(type, buffer);
892
893 printf(
894 "%" PRIu64 ":" _TOOL_PREFIX
895 " ompt_event_task_create: parent_task_id=%" PRIx64
896 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
897 "new_task_id=%" PRIx64
898 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
900 encountering_task_data ? encountering_task_data->value : 0,
901 encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
902 encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
903 new_task_data->value, codeptr_ra, buffer, type,
904 has_dependences ? "yes" : "no");
905}
906
907static void on_ompt_callback_task_schedule(ompt_data_t *first_task_data,
908 ompt_task_status_t prior_task_status,
909 ompt_data_t *second_task_data) {
910 printf("%" PRIu64 ":" _TOOL_PREFIX
911 " ompt_event_task_schedule: first_task_id=%" PRIx64
912 ", second_task_id=%" PRIx64 ", prior_task_status=%s=%d\n",
913 ompt_get_thread_data()->value, first_task_data->value,
914 (second_task_data ? second_task_data->value : -1),
915 ompt_task_status_t_values[prior_task_status], prior_task_status);
916 if (prior_task_status == ompt_task_complete ||
917 prior_task_status == ompt_task_late_fulfill ||
918 prior_task_status == ompt_taskwait_complete) {
919 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIx64
920 "\n",
921 ompt_get_thread_data()->value, first_task_data->value);
922 }
923}
924
925static void on_ompt_callback_dependences(ompt_data_t *task_data,
926 const ompt_dependence_t *deps,
927 int ndeps) {
928 char buffer[2048];
929 char *progress = buffer;
930 int i;
931 for (i = 0; i < ndeps && progress < buffer + 2000; i++) {
932 if (deps[i].dependence_type == ompt_dependence_type_source ||
933 deps[i].dependence_type == ompt_dependence_type_sink)
934 progress +=
935 sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value,
936 ompt_dependence_type_t_values[deps[i].dependence_type]);
937 else
938 progress +=
939 sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
940 ompt_dependence_type_t_values[deps[i].dependence_type]);
941 }
942 if (ndeps > 0)
943 progress[-2] = 0;
944 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIx64
945 ", deps=[%s], ndeps=%d\n",
946 ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
947}
948
949static void on_ompt_callback_task_dependence(ompt_data_t *first_task_data,
950 ompt_data_t *second_task_data) {
951 printf("%" PRIu64 ":" _TOOL_PREFIX
952 " ompt_event_task_dependence_pair: first_task_id=%" PRIx64
953 ", second_task_id=%" PRIx64 "\n",
954 ompt_get_thread_data()->value, first_task_data->value,
955 second_task_data->value);
956}
957
958static void on_ompt_callback_thread_begin(ompt_thread_t thread_type,
959 ompt_data_t *thread_data) {
960 if (thread_data->ptr)
961 printf("%s\n", "0: thread_data initially not null");
962 thread_data->value = ompt_get_unique_id();
963 printf("%" PRIu64 ":" _TOOL_PREFIX
964 " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
966 thread_type, thread_data->value);
967}
968
969static void on_ompt_callback_thread_end(ompt_data_t *thread_data) {
970 printf("%" PRIu64 ":" _TOOL_PREFIX
971 " ompt_event_thread_end: thread_id=%" PRIu64 "\n",
972 ompt_get_thread_data()->value, thread_data->value);
973}
974
975static int on_ompt_callback_control_tool(uint64_t command, uint64_t modifier,
976 void *arg, const void *codeptr_ra) {
977 ompt_frame_t *omptTaskFrame;
978 ompt_get_task_info(0, NULL, (ompt_data_t **)NULL, &omptTaskFrame, NULL, NULL);
979 printf("%" PRIu64 ":" _TOOL_PREFIX
980 " ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64
981 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
982 "current_task_frame.reenter=%p \n",
983 ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
984 omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
985
986 // the following would interfere with expected output for OMPT tests, so skip
987#ifndef _OMPT_TESTS
988 // print task data
989 int task_level = 0;
990 ompt_data_t *task_data;
991 while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
992 NULL, NULL)) {
993 printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIx64 "\n",
994 ompt_get_thread_data()->value, task_level, task_data->value);
995 task_level++;
996 }
997
998 // print parallel data
999 int parallel_level = 0;
1000 ompt_data_t *parallel_data;
1001 while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)&parallel_data,
1002 NULL)) {
1003 printf("%" PRIu64 ":" _TOOL_PREFIX
1004 " parallel level %d: parallel_id=%" PRIx64 "\n",
1005 ompt_get_thread_data()->value, parallel_level, parallel_data->value);
1006 parallel_level++;
1007 }
1008#endif
1009 return 0; // success
1010}
1011
1012static void on_ompt_callback_error(ompt_severity_t severity,
1013 const char *message, size_t length,
1014 const void *codeptr_ra) {
1015 printf("%" PRIu64 ": ompt_event_runtime_error: severity=%" PRIu32
1016 ", message=%s, length=%" PRIu64 ", codeptr_ra=%p\n",
1017 ompt_get_thread_data()->value, severity, message, (uint64_t)length,
1018 codeptr_ra);
1019}
1020
1021int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num,
1022 ompt_data_t *tool_data) {
1023 ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
1024 ompt_get_callback = (ompt_get_callback_t)lookup("ompt_get_callback");
1025 ompt_get_state = (ompt_get_state_t)lookup("ompt_get_state");
1026 ompt_get_task_info = (ompt_get_task_info_t)lookup("ompt_get_task_info");
1027 ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory");
1028 ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
1030 (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
1031 ompt_get_unique_id = (ompt_get_unique_id_t)lookup("ompt_get_unique_id");
1032 ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool");
1033
1035
1036 ompt_get_num_procs = (ompt_get_num_procs_t)lookup("ompt_get_num_procs");
1037 ompt_get_num_places = (ompt_get_num_places_t)lookup("ompt_get_num_places");
1039 (ompt_get_place_proc_ids_t)lookup("ompt_get_place_proc_ids");
1040 ompt_get_place_num = (ompt_get_place_num_t)lookup("ompt_get_place_num");
1042 (ompt_get_partition_place_nums_t)lookup("ompt_get_partition_place_nums");
1043 ompt_get_proc_id = (ompt_get_proc_id_t)lookup("ompt_get_proc_id");
1045 (ompt_enumerate_states_t)lookup("ompt_enumerate_states");
1047 (ompt_enumerate_mutex_impls_t)lookup("ompt_enumerate_mutex_impls");
1048
1049 register_ompt_callback(ompt_callback_mutex_acquire);
1050 register_ompt_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
1051 register_ompt_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
1052 register_ompt_callback(ompt_callback_nest_lock);
1053 register_ompt_callback(ompt_callback_sync_region);
1054 register_ompt_callback_t(ompt_callback_sync_region_wait,
1055 ompt_callback_sync_region_t);
1056 register_ompt_callback_t(ompt_callback_reduction,
1057 ompt_callback_sync_region_t);
1058 register_ompt_callback(ompt_callback_control_tool);
1059 register_ompt_callback(ompt_callback_flush);
1060 register_ompt_callback(ompt_callback_cancel);
1061 register_ompt_callback(ompt_callback_implicit_task);
1062 register_ompt_callback_t(ompt_callback_lock_init,
1063 ompt_callback_mutex_acquire_t);
1064 register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
1065 register_ompt_callback(ompt_callback_work);
1066 register_ompt_callback(ompt_callback_dispatch);
1067 register_ompt_callback(ompt_callback_masked);
1068 register_ompt_callback(ompt_callback_parallel_begin);
1069 register_ompt_callback(ompt_callback_parallel_end);
1070 register_ompt_callback(ompt_callback_task_create);
1071 register_ompt_callback(ompt_callback_task_schedule);
1072 register_ompt_callback(ompt_callback_dependences);
1073 register_ompt_callback(ompt_callback_task_dependence);
1074 register_ompt_callback(ompt_callback_thread_begin);
1075 register_ompt_callback(ompt_callback_thread_end);
1076 register_ompt_callback(ompt_callback_error);
1077 printf("0: NULL_POINTER=%p\n", (void *)NULL);
1078 return 1; // success
1079}
1080
1081void ompt_finalize(ompt_data_t *tool_data) {
1082 printf("0: ompt_event_runtime_shutdown\n");
1083}
1084
1085#ifdef __cplusplus
1086extern "C" {
1087#endif
1089 const char *runtime_version) {
1091 &ompt_finalize, 0};
1092 return &ompt_start_tool_result;
1093}
1094#ifdef __cplusplus
1095}
1096#endif
1097#endif // ifndef USE_PRIVATE_TOOL
1098#ifdef _OMPT_TESTS
1099#undef _OMPT_TESTS
1100#endif
static void on_ompt_callback_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition: callback.h:486
static void on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num, int flags)
Definition: callback.h:661
static const char * ompt_thread_t_values[]
Definition: callback.h:27
static void print_ids(int level)
Definition: callback.h:187
static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data, ompt_data_t *encountering_task_data, int flag, const void *codeptr_ra)
Definition: callback.h:870
static void on_ompt_callback_flush(ompt_data_t *thread_data, const void *codeptr_ra)
Definition: callback.h:629
static ompt_get_task_info_t ompt_get_task_info
Definition: callback.h:156
static ompt_enumerate_states_t ompt_enumerate_states
Definition: callback.h:168
static const char * ompt_work_events_t_values[]
Definition: callback.h:63
static void on_ompt_callback_lock_destroy(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition: callback.h:744
static void on_ompt_callback_task_schedule(ompt_data_t *first_task_data, ompt_task_status_t prior_task_status, ompt_data_t *second_task_data)
Definition: callback.h:907
static ompt_get_callback_t ompt_get_callback
Definition: callback.h:154
static void format_task_type(int type, char *buffer)
Definition: callback.h:129
static ompt_get_place_proc_ids_t ompt_get_place_proc_ids
Definition: callback.h:164
static void on_ompt_callback_lock_init(ompt_mutex_t kind, unsigned int hint, unsigned int impl, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition: callback.h:723
static ompt_set_callback_t ompt_set_callback
Definition: callback.h:153
static ompt_get_place_num_t ompt_get_place_num
Definition: callback.h:165
#define register_ompt_callback(name)
Definition: callback.h:383
static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
Definition: callback.h:824
static void on_ompt_callback_dependences(ompt_data_t *task_data, const ompt_dependence_t *deps, int ndeps)
Definition: callback.h:925
static ompt_get_partition_place_nums_t ompt_get_partition_place_nums
Definition: callback.h:166
static void on_ompt_callback_task_create(ompt_data_t *encountering_task_data, const ompt_frame_t *encountering_task_frame, ompt_data_t *new_task_data, int type, int has_dependences, const void *codeptr_ra)
Definition: callback.h:882
static void on_ompt_callback_error(ompt_severity_t severity, const char *message, size_t length, const void *codeptr_ra)
Definition: callback.h:1012
static void on_ompt_callback_nest_lock(ompt_scope_endpoint_t endpoint, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition: callback.h:522
int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t *tool_data)
Definition: callback.h:1021
static void on_ompt_callback_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition: callback.h:437
static const char * ompt_dependence_type_t_values[36]
Definition: callback.h:78
static ompt_get_task_memory_t ompt_get_task_memory
Definition: callback.h:157
void assert_frame_flags(int enterf, int exitf)
Definition: callback.h:171
static int on_ompt_callback_control_tool(uint64_t command, uint64_t modifier, void *arg, const void *codeptr_ra)
Definition: callback.h:975
static const char * ompt_task_status_t_values[]
Definition: callback.h:31
void ompt_finalize(ompt_data_t *tool_data)
Definition: callback.h:1081
static void on_ompt_callback_task_dependence(ompt_data_t *first_task_data, ompt_data_t *second_task_data)
Definition: callback.h:949
static void on_ompt_callback_mutex_acquire(ompt_mutex_t kind, unsigned int hint, unsigned int impl, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition: callback.h:386
#define register_ompt_callback_t(name, type)
Definition: callback.h:376
static void on_ompt_callback_parallel_begin(ompt_data_t *encountering_task_data, const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, uint32_t requested_team_size, int flag, const void *codeptr_ra)
Definition: callback.h:849
static ompt_get_num_procs_t ompt_get_num_procs
Definition: callback.h:162
static void on_ompt_callback_thread_end(ompt_data_t *thread_data)
Definition: callback.h:969
static void on_ompt_callback_sync_region(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
Definition: callback.h:544
static void on_ompt_callback_work(ompt_work_t wstype, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, uint64_t count, const void *codeptr_ra)
Definition: callback.h:764
static ompt_finalize_tool_t ompt_finalize_tool
Definition: callback.h:161
static ompt_get_num_places_t ompt_get_num_places
Definition: callback.h:163
static ompt_get_unique_id_t ompt_get_unique_id
Definition: callback.h:160
static ompt_get_state_t ompt_get_state
Definition: callback.h:155
static const char * ompt_cancel_flag_t_values[]
Definition: callback.h:42
#define _TOOL_PREFIX
Definition: callback.h:22
static void on_ompt_callback_thread_begin(ompt_thread_t thread_type, ompt_data_t *thread_data)
Definition: callback.h:958
static void on_ompt_callback_sync_region_wait(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
Definition: callback.h:578
static void on_ompt_callback_dispatch(ompt_data_t *parallel_data, ompt_data_t *task_data, ompt_dispatch_t kind, ompt_data_t instance)
Definition: callback.h:788
static const char * ompt_work_t_values[]
Definition: callback.h:48
static const char * ompt_sync_region_t_values[]
Definition: callback.h:117
static void on_ompt_callback_reduction(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
Definition: callback.h:601
static ompt_get_parallel_info_t ompt_get_parallel_info
Definition: callback.h:159
static void on_ompt_callback_cancel(ompt_data_t *task_data, int flags, const void *codeptr_ra)
Definition: callback.h:635
static ompt_get_proc_id_t ompt_get_proc_id
Definition: callback.h:167
static ompt_get_thread_data_t ompt_get_thread_data
Definition: callback.h:158
static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls
Definition: callback.h:169
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance * instance
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t length
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s const char ITT_FORMAT s __itt_frame frame
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
#define i
Definition: kmp_stub.cpp:87
struct ompt_start_tool_result_t ompt_start_tool_result_t
static ompt_start_tool_result_t * ompt_start_tool_result
#define ompt_start_tool
volatile int flag