LLVM OpenMP
callback.h
Go to the documentation of this file.
1#ifndef _BSD_SOURCE
2#define _BSD_SOURCE
3#endif
4#ifndef _DEFAULT_SOURCE
5#define _DEFAULT_SOURCE
6#endif
7#include <stdio.h>
8#ifndef __STDC_FORMAT_MACROS
9#define __STDC_FORMAT_MACROS
10#endif
11#include <inttypes.h>
12#include <omp.h>
13#include <omp-tools.h>
14#include "ompt-signal.h"
15#include <stdlib.h>
16#include <assert.h>
17
18// Used to detect architecture
20
21#ifndef _TOOL_PREFIX
22#define _TOOL_PREFIX ""
23// If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
24#define _OMPT_TESTS
25#endif
26
27static const char *ompt_thread_t_values[] = {
28 "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
29 "ompt_thread_other"};
30
31static const char *ompt_task_status_t_values[] = {
32 "ompt_task_UNDEFINED",
33 "ompt_task_complete", // 1
34 "ompt_task_yield", // 2
35 "ompt_task_cancel", // 3
36 "ompt_task_detach", // 4
37 "ompt_task_early_fulfill", // 5
38 "ompt_task_late_fulfill", // 6
39 "ompt_task_switch", // 7
40 "ompt_taskwait_complete" // 8
41};
42static const char *ompt_cancel_flag_t_values[] = {
43 "ompt_cancel_parallel", "ompt_cancel_sections",
44 "ompt_cancel_loop", "ompt_cancel_taskgroup",
45 "ompt_cancel_activated", "ompt_cancel_detected",
46 "ompt_cancel_discarded_task"};
47
48static const char *ompt_work_t_values[] = {"undefined",
49 "ompt_work_loop",
50 "ompt_work_sections",
51 "ompt_work_single_executor",
52 "ompt_work_single_other",
53 "ompt_work_workshare",
54 "ompt_work_distribute",
55 "ompt_work_taskloop",
56 "ompt_work_scope",
57 "ompt_work_workdistribute",
58 "ompt_work_loop_static",
59 "ompt_work_loop_dynamic",
60 "ompt_work_loop_guided",
61 "ompt_work_loop_other"};
62
63static const char *ompt_work_events_t_values[] = {"undefined",
64 "ompt_event_loop",
65 "ompt_event_sections",
66 "ompt_event_single_in_block",
67 "ompt_event_single_others",
68 "ompt_event_workshare",
69 "ompt_event_distribute",
70 "ompt_event_taskloop",
71 "ompt_event_scope",
72 "ompt_event_workdistribute",
73 "ompt_event_loop_static",
74 "ompt_event_loop_dynamic",
75 "ompt_event_loop_guided",
76 "ompt_event_loop_other"};
77
78static const char *ompt_dependence_type_t_values[36] = {
79 "ompt_dependence_type_UNDEFINED",
80 "ompt_dependence_type_in", // 1
81 "ompt_dependence_type_out", // 2
82 "ompt_dependence_type_inout", // 3
83 "ompt_dependence_type_mutexinoutset", // 4
84 "ompt_dependence_type_source", // 5
85 "ompt_dependence_type_sink", // 6
86 "ompt_dependence_type_inoutset", // 7
87 "",
88 "",
89 "",
90 "",
91 "",
92 "", // 8-13
93 "",
94 "",
95 "",
96 "",
97 "",
98 "",
99 "",
100 "",
101 "",
102 "", // 14-23
103 "",
104 "",
105 "",
106 "",
107 "",
108 "",
109 "",
110 "",
111 "",
112 "", // 24-33
113 "ompt_dependence_type_out_all_memory", // 34
114 "ompt_dependence_type_inout_all_memory" // 35
115};
116
117static const char *ompt_sync_region_t_values[] = {"undefined",
118 "barrier",
119 "barrier_implicit",
120 "barrier_explicit",
121 "barrier_implementation",
122 "taskwait",
123 "taskgroup",
124 "reduction",
125 "barrier_implicit_workshare",
126 "barrier_implicit_parallel",
127 "barrier_teams"};
128
129static void format_task_type(int type, char *buffer) {
130 char *progress = buffer;
131 if (type & ompt_task_initial)
132 progress += sprintf(progress, "ompt_task_initial");
133 if (type & ompt_task_implicit)
134 progress += sprintf(progress, "ompt_task_implicit");
135 if (type & ompt_task_explicit)
136 progress += sprintf(progress, "ompt_task_explicit");
137 if (type & ompt_task_target)
138 progress += sprintf(progress, "ompt_task_target");
139 if (type & ompt_task_taskwait)
140 progress += sprintf(progress, "ompt_task_taskwait");
141 if (type & ompt_task_importing)
142 progress += sprintf(progress, "|ompt_task_importing");
143 if (type & ompt_task_exporting)
144 progress += sprintf(progress, "|ompt_task_exporting");
145 if (type & ompt_task_undeferred)
146 progress += sprintf(progress, "|ompt_task_undeferred");
147 if (type & ompt_task_untied)
148 progress += sprintf(progress, "|ompt_task_untied");
149 if (type & ompt_task_final)
150 progress += sprintf(progress, "|ompt_task_final");
151 if (type & ompt_task_mergeable)
152 progress += sprintf(progress, "|ompt_task_mergeable");
153 if (type & ompt_task_merged)
154 progress += sprintf(progress, "|ompt_task_merged");
155}
156
157static ompt_set_callback_t ompt_set_callback;
158static ompt_get_callback_t ompt_get_callback;
159static ompt_get_state_t ompt_get_state;
160static ompt_get_task_info_t ompt_get_task_info;
161static ompt_get_task_memory_t ompt_get_task_memory;
162static ompt_get_thread_data_t ompt_get_thread_data;
163static ompt_get_parallel_info_t ompt_get_parallel_info;
164static ompt_get_unique_id_t ompt_get_unique_id;
165static ompt_finalize_tool_t ompt_finalize_tool;
166static ompt_get_num_procs_t ompt_get_num_procs;
167static ompt_get_num_places_t ompt_get_num_places;
168static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
169static ompt_get_place_num_t ompt_get_place_num;
170static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
171static ompt_get_proc_id_t ompt_get_proc_id;
172static ompt_enumerate_states_t ompt_enumerate_states;
173static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
174
175void assert_frame_flags(int enterf, int exitf) {
176 if (!(enterf == (ompt_frame_application | ompt_frame_cfa) ||
177 enterf == (ompt_frame_runtime | ompt_frame_cfa))) {
178 printf("enter_frame_flags (%i) is invalid\n", enterf);
179 fflush(NULL);
180 }
181 if (!(exitf == (ompt_frame_application | ompt_frame_cfa) ||
182 exitf == (ompt_frame_runtime | ompt_frame_cfa))) {
183 printf("exit_frame_flags (%i) is invalid\n", exitf);
184 fflush(NULL);
185 }
186 assert(enterf == (ompt_frame_application | ompt_frame_cfa) ||
187 enterf == (ompt_frame_runtime | ompt_frame_cfa));
188 assert(exitf == (ompt_frame_application | ompt_frame_cfa) ||
189 exitf == (ompt_frame_runtime | ompt_frame_cfa));
190}
191static void print_ids(int level) {
192 int task_type, thread_num;
193 ompt_frame_t *frame;
194 ompt_data_t *task_parallel_data;
195 ompt_data_t *task_data;
196 int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
197 &task_parallel_data, &thread_num);
198 char buffer[2048];
199 format_task_type(task_type, buffer);
200 if (frame) {
201 printf("%" PRIu64 ": task level %d: parallel_id=%" PRIx64
202 ", task_id=%" PRIx64 ", exit_frame=%p, reenter_frame=%p, "
203 "task_type=%s=%d, thread_num=%d\n",
205 exists_task ? task_parallel_data->value : 0,
206 exists_task ? task_data->value : 0, frame->exit_frame.ptr,
207 frame->enter_frame.ptr, buffer, task_type, thread_num);
208 assert_frame_flags(frame->enter_frame_flags, frame->exit_frame_flags);
209 }
210}
211
212#define get_frame_address(level) __builtin_frame_address(level)
213
214#define print_frame(level) \
215 printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \
216 ompt_get_thread_data()->value, level, get_frame_address(level))
217
218// clang (version 5.0 and above) adds an intermediate function call with debug
219// flag (-g)
220#if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
221#if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
222#define print_frame_from_outlined_fn(level) print_frame(level + 1)
223#else
224#define print_frame_from_outlined_fn(level) print_frame(level)
225#endif
226
227#if defined(__clang__) && __clang_major__ >= 5
228#warning \
229 "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
230#warning \
231 "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
232#endif
233#endif
234
235// This macro helps to define a label at the current position that can be used
236// to get the current address in the code.
237//
238// For print_current_address():
239// To reliably determine the offset between the address of the label and the
240// actual return address, we insert a NOP instruction as a jump target as the
241// compiler would otherwise insert an instruction that we can't control. The
242// instruction length is target dependent and is explained below.
243//
244// (The empty block between "#pragma omp ..." and the __asm__ statement is a
245// workaround for a bug in the Intel Compiler.)
246#define define_ompt_label(id) \
247 { \
248 } \
249 __asm__("nop"); \
250 ompt_label_##id:
251
252// This macro helps to get the address of a label that is inserted by the above
253// macro define_ompt_label(). The address is obtained with a GNU extension
254// (&&label) that has been tested with gcc, clang and icc.
255#define get_ompt_label_address(id) (&&ompt_label_##id)
256
257// This macro prints the exact address that a previously called runtime function
258// returns to.
259#define print_current_address(id) \
260 define_ompt_label(id) \
261 print_possible_return_addresses(get_ompt_label_address(id))
262
263#if KMP_ARCH_X86 || KMP_ARCH_X86_64
264// On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
265// a MOV instruction for non-void runtime functions which is 3 bytes long.
266#define print_possible_return_addresses(addr) \
267 printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
268 ompt_get_thread_data()->value, ((char *)addr) - 1, \
269 ((char *)addr) - 4)
270#elif KMP_ARCH_PPC64
271// On Power the NOP instruction is 4 bytes long. In addition, the compiler
272// inserts a second NOP instruction (another 4 bytes). For non-void runtime
273// functions Clang inserts a STW instruction (but only if compiling under
274// -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
275#define print_possible_return_addresses(addr) \
276 printf("%" PRIu64 ": current_address=%p or %p\n", \
277 ompt_get_thread_data()->value, ((char *)addr) - 8, \
278 ((char *)addr) - 12)
279#elif KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32
280// On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
281// store instruction (another 4 bytes long).
282// FIXME: PR #65696 addded a third possibility (12 byte offset) to make the
283// tests pass on Darwin. Adding the same for other OSes. However, the proper
284// fix for this is to remove the extra branch instruction being generated by
285// the AArch64 backend. See issue #69627.
286#define print_possible_return_addresses(addr) \
287 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
288 ompt_get_thread_data()->value, ((char *)addr) - 4, \
289 ((char *)addr) - 8, ((char *)addr) - 12)
290#elif KMP_ARCH_RISCV64
291#if __riscv_compressed
292// On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
293// inserts a J instruction (targeting the successor basic block), which
294// accounts for another 4 bytes. Finally, an additional J instruction may
295// appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
296// another branch).
297#define print_possible_return_addresses(addr) \
298 printf("%" PRIu64 ": current_address=%p or %p\n", \
299 ompt_get_thread_data()->value, ((char *)addr) - 6, \
300 ((char *)addr) - 10)
301#else
302// On RV64G the NOP instruction is 4 byte long. In addition, the compiler
303// inserts a J instruction (targeting the successor basic block), which
304// accounts for another 4 bytes. Finally, an additional J instruction may
305// appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
306// another branch).
307#define print_possible_return_addresses(addr) \
308 printf("%" PRIu64 ": current_address=%p or %p\n", \
309 ompt_get_thread_data()->value, ((char *)addr) - 8, \
310 ((char *)addr) - 12)
311#endif
312#elif KMP_ARCH_LOONGARCH64
313// On LoongArch64 the NOP instruction is 4 bytes long, can be followed by
314// inserted jump instruction (another 4 bytes long). And an additional jump
315// instruction may appear (adding 4 more bytes) when the NOP is referenced
316// elsewhere (ie. another branch).
317#define print_possible_return_addresses(addr) \
318 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
319 ompt_get_thread_data()->value, ((char *)addr) - 4, \
320 ((char *)addr) - 8, ((char *)addr) - 12)
321#elif KMP_ARCH_VE
322// On VE the NOP instruction is 8 byte long. In addition, the compiler inserts
323// a ??? instruction for non-void runtime functions which is ? bytes long.
324#define print_possible_return_addresses(addr) \
325 printf("%" PRIu64 ": current_address=%p or %p\n", \
326 ompt_get_thread_data()->value, ((char *)addr) - 8, \
327 ((char *)addr) - 8)
328#elif KMP_ARCH_S390X
329// On s390x the NOP instruction is 2 bytes long. For non-void runtime
330// functions Clang inserts a STY instruction (but only if compiling under
331// -fno-PIC which will be the default with Clang 8.0, another 6 bytes).
332//
333// Another possibility is:
334//
335// brasl %r14,__kmpc_end_master@plt
336// a7 f4 00 02 j 0f
337// 47 00 00 00 0: nop
338// a7 f4 00 02 j addr
339// addr:
340#define print_possible_return_addresses(addr) \
341 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
342 ompt_get_thread_data()->value, ((char *)addr) - 2, \
343 ((char *)addr) - 8, ((char *)addr) - 12)
344#elif KMP_ARCH_SPARC
345// FIXME: Need to distinguish between 32 and 64-bit SPARC?
346// On SPARC the NOP instruction is 4 bytes long.
347// FIXME: Explain. Can use __builtin_frob_return_addr?
348#define print_possible_return_addresses(addr) \
349 printf("%" PRIu64 ": current_address=%p or %p\n", \
350 ompt_get_thread_data()->value, ((char *)addr) - 12, \
351 (char *)addr - 20)
352#else
353#error Unsupported target architecture, cannot determine address offset!
354#endif
355
356// This macro performs a somewhat similar job to print_current_address(), except
357// that it discards a certain number of nibbles from the address and only prints
358// the most significant bits / nibbles. This can be used for cases where the
359// return address can only be approximated.
360//
361// To account for overflows (ie the most significant bits / nibbles have just
362// changed as we are a few bytes above the relevant power of two) the addresses
363// of the "current" and of the "previous block" are printed.
364#define print_fuzzy_address(id) \
365 define_ompt_label(id) print_fuzzy_address_blocks(get_ompt_label_address(id))
366
367// If you change this define you need to adapt all capture patterns in the tests
368// to include or discard the new number of nibbles!
369#define FUZZY_ADDRESS_DISCARD_NIBBLES 2
370#define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
371#define print_fuzzy_address_blocks(addr) \
372 printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \
373 " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \
374 ompt_get_thread_data()->value, \
375 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \
376 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \
377 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \
378 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
379
380#define register_ompt_callback_t(name, type) \
381 do { \
382 type f_##name = &on_##name; \
383 if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \
384 printf("0: Could not register callback '" #name "'\n"); \
385 } while (0)
386
387#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t)
388
389#ifndef USE_PRIVATE_TOOL
390static void on_ompt_callback_mutex_acquire(ompt_mutex_t kind, unsigned int hint,
391 unsigned int impl,
392 ompt_wait_id_t wait_id,
393 const void *codeptr_ra) {
394 switch (kind) {
395 case ompt_mutex_lock:
396 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_wait_lock: wait_id=%" PRIu64
397 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
398 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
399 break;
400 case ompt_mutex_test_lock:
401 printf("%" PRIu64 ":" _TOOL_PREFIX
402 " ompt_event_wait_test_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
403 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
404 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
405 break;
406 case ompt_mutex_nest_lock:
407 printf("%" PRIu64 ":" _TOOL_PREFIX
408 " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
409 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
410 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
411 break;
412 case ompt_mutex_test_nest_lock:
413 printf("%" PRIu64 ":" _TOOL_PREFIX
414 " ompt_event_wait_test_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
415 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
416 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
417 break;
418 case ompt_mutex_critical:
419 printf("%" PRIu64 ":" _TOOL_PREFIX
420 " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
421 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
422 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
423 break;
424 case ompt_mutex_atomic:
425 printf("%" PRIu64 ":" _TOOL_PREFIX
426 " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
427 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
428 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
429 break;
430 case ompt_mutex_ordered:
431 printf("%" PRIu64 ":" _TOOL_PREFIX
432 " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
433 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
434 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
435 break;
436 default:
437 break;
438 }
439}
440
441static void on_ompt_callback_mutex_acquired(ompt_mutex_t kind,
442 ompt_wait_id_t wait_id,
443 const void *codeptr_ra) {
444 switch (kind) {
445 case ompt_mutex_lock:
446 printf("%" PRIu64 ":" _TOOL_PREFIX
447 " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
448 ompt_get_thread_data()->value, wait_id, codeptr_ra);
449 break;
450 case ompt_mutex_test_lock:
451 printf("%" PRIu64 ":" _TOOL_PREFIX
452 " ompt_event_acquired_test_lock: wait_id=%" PRIu64
453 ", codeptr_ra=%p \n",
454 ompt_get_thread_data()->value, wait_id, codeptr_ra);
455 break;
456 case ompt_mutex_nest_lock:
457 printf("%" PRIu64 ":" _TOOL_PREFIX
458 " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
459 ", codeptr_ra=%p \n",
460 ompt_get_thread_data()->value, wait_id, codeptr_ra);
461 break;
462 case ompt_mutex_test_nest_lock:
463 printf("%" PRIu64 ":" _TOOL_PREFIX
464 " ompt_event_acquired_test_nest_lock_first: wait_id=%" PRIu64
465 ", codeptr_ra=%p \n",
466 ompt_get_thread_data()->value, wait_id, codeptr_ra);
467 break;
468 case ompt_mutex_critical:
469 printf("%" PRIu64 ":" _TOOL_PREFIX
470 " ompt_event_acquired_critical: wait_id=%" PRIu64
471 ", codeptr_ra=%p \n",
472 ompt_get_thread_data()->value, wait_id, codeptr_ra);
473 break;
474 case ompt_mutex_atomic:
475 printf("%" PRIu64 ":" _TOOL_PREFIX
476 " ompt_event_acquired_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
477 ompt_get_thread_data()->value, wait_id, codeptr_ra);
478 break;
479 case ompt_mutex_ordered:
480 printf("%" PRIu64 ":" _TOOL_PREFIX
481 " ompt_event_acquired_ordered: wait_id=%" PRIu64
482 ", codeptr_ra=%p \n",
483 ompt_get_thread_data()->value, wait_id, codeptr_ra);
484 break;
485 default:
486 break;
487 }
488}
489
490static void on_ompt_callback_mutex_released(ompt_mutex_t kind,
491 ompt_wait_id_t wait_id,
492 const void *codeptr_ra) {
493 switch (kind) {
494 case ompt_mutex_lock:
495 printf("%" PRIu64 ":" _TOOL_PREFIX
496 " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
497 ompt_get_thread_data()->value, wait_id, codeptr_ra);
498 break;
499 case ompt_mutex_nest_lock:
500 printf("%" PRIu64 ":" _TOOL_PREFIX
501 " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
502 ", codeptr_ra=%p \n",
503 ompt_get_thread_data()->value, wait_id, codeptr_ra);
504 break;
505 case ompt_mutex_critical:
506 printf("%" PRIu64 ":" _TOOL_PREFIX
507 " ompt_event_release_critical: wait_id=%" PRIu64
508 ", codeptr_ra=%p \n",
509 ompt_get_thread_data()->value, wait_id, codeptr_ra);
510 break;
511 case ompt_mutex_atomic:
512 printf("%" PRIu64 ":" _TOOL_PREFIX
513 " ompt_event_release_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
514 ompt_get_thread_data()->value, wait_id, codeptr_ra);
515 break;
516 case ompt_mutex_ordered:
517 printf("%" PRIu64 ":" _TOOL_PREFIX
518 " ompt_event_release_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
519 ompt_get_thread_data()->value, wait_id, codeptr_ra);
520 break;
521 default:
522 break;
523 }
524}
525
526static void on_ompt_callback_nest_lock(ompt_scope_endpoint_t endpoint,
527 ompt_wait_id_t wait_id,
528 const void *codeptr_ra) {
529 switch (endpoint) {
530 case ompt_scope_begin:
531 printf("%" PRIu64 ":" _TOOL_PREFIX
532 " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
533 ", codeptr_ra=%p \n",
534 ompt_get_thread_data()->value, wait_id, codeptr_ra);
535 break;
536 case ompt_scope_end:
537 printf("%" PRIu64 ":" _TOOL_PREFIX
538 " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
539 ", codeptr_ra=%p \n",
540 ompt_get_thread_data()->value, wait_id, codeptr_ra);
541 break;
542 case ompt_scope_beginend:
543 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
544 exit(-1);
545 }
546}
547
548static void on_ompt_callback_sync_region(ompt_sync_region_t kind,
549 ompt_scope_endpoint_t endpoint,
550 ompt_data_t *parallel_data,
551 ompt_data_t *task_data,
552 const void *codeptr_ra) {
553 if (endpoint == ompt_scope_beginend) {
554 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
555 exit(-1);
556 }
557 if (kind == ompt_sync_region_reduction) {
558 printf("ompt_sync_region_reduction should never be passed to %s\n",
559 __func__);
560 exit(-1);
561 }
562 uint64_t parallel_data_value = parallel_data ? parallel_data->value : 0;
563 const char *begin_or_end = (endpoint == ompt_scope_begin) ? "begin" : "end";
564 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_%s: parallel_id=%" PRIx64
565 ", task_id=%" PRIx64 ", codeptr_ra=%p\n",
567 begin_or_end, parallel_data_value, task_data->value, codeptr_ra);
568 switch (kind) {
569 case ompt_sync_region_barrier:
570 case ompt_sync_region_barrier_implicit:
571 case ompt_sync_region_barrier_implicit_workshare:
572 case ompt_sync_region_barrier_implicit_parallel:
573 case ompt_sync_region_barrier_teams:
574 case ompt_sync_region_barrier_explicit:
575 case ompt_sync_region_barrier_implementation:
576 if (endpoint == ompt_scope_begin)
577 print_ids(0);
578 default:;
579 }
580}
581
582static void on_ompt_callback_sync_region_wait(ompt_sync_region_t kind,
583 ompt_scope_endpoint_t endpoint,
584 ompt_data_t *parallel_data,
585 ompt_data_t *task_data,
586 const void *codeptr_ra) {
587 if (endpoint == ompt_scope_beginend) {
588 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
589 exit(-1);
590 }
591 if (kind == ompt_sync_region_reduction) {
592 printf("ompt_sync_region_reduction should never be passed to %s\n",
593 __func__);
594 exit(-1);
595 }
596 uint64_t parallel_data_value = parallel_data ? parallel_data->value : 0;
597 const char *begin_or_end = (endpoint == ompt_scope_begin) ? "begin" : "end";
598 printf("%" PRIu64 ":" _TOOL_PREFIX
599 " ompt_event_wait_%s_%s: parallel_id=%" PRIx64 ", task_id=%" PRIx64
600 ", codeptr_ra=%p\n",
602 begin_or_end, parallel_data_value, task_data->value, codeptr_ra);
603}
604
605static void on_ompt_callback_reduction(ompt_sync_region_t kind,
606 ompt_scope_endpoint_t endpoint,
607 ompt_data_t *parallel_data,
608 ompt_data_t *task_data,
609 const void *codeptr_ra) {
610 switch (endpoint) {
611 case ompt_scope_begin:
612 printf("%" PRIu64 ":" _TOOL_PREFIX
613 " ompt_event_reduction_begin: parallel_id=%" PRIx64
614 ", task_id=%" PRIx64 ", codeptr_ra=%p\n",
616 (parallel_data) ? parallel_data->value : 0, task_data->value,
617 codeptr_ra);
618 break;
619 case ompt_scope_end:
620 printf("%" PRIu64 ":" _TOOL_PREFIX
621 " ompt_event_reduction_end: parallel_id=%" PRIx64
622 ", task_id=%" PRIx64 ", codeptr_ra=%p\n",
624 (parallel_data) ? parallel_data->value : 0, task_data->value,
625 codeptr_ra);
626 break;
627 case ompt_scope_beginend:
628 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
629 exit(-1);
630 }
631}
632
633static void on_ompt_callback_flush(ompt_data_t *thread_data,
634 const void *codeptr_ra) {
635 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
636 thread_data->value, codeptr_ra);
637}
638
639static void on_ompt_callback_cancel(ompt_data_t *task_data, int flags,
640 const void *codeptr_ra) {
641 const char *first_flag_value;
642 const char *second_flag_value;
643 if (flags & ompt_cancel_parallel)
644 first_flag_value = ompt_cancel_flag_t_values[0];
645 else if (flags & ompt_cancel_sections)
646 first_flag_value = ompt_cancel_flag_t_values[1];
647 else if (flags & ompt_cancel_loop)
648 first_flag_value = ompt_cancel_flag_t_values[2];
649 else if (flags & ompt_cancel_taskgroup)
650 first_flag_value = ompt_cancel_flag_t_values[3];
651
652 if (flags & ompt_cancel_activated)
653 second_flag_value = ompt_cancel_flag_t_values[4];
654 else if (flags & ompt_cancel_detected)
655 second_flag_value = ompt_cancel_flag_t_values[5];
656 else if (flags & ompt_cancel_discarded_task)
657 second_flag_value = ompt_cancel_flag_t_values[6];
658
659 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_id=%" PRIx64
660 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
661 ompt_get_thread_data()->value, task_data->value, first_flag_value,
662 second_flag_value, flags, codeptr_ra);
663}
664
665static void on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,
666 ompt_data_t *parallel_data,
667 ompt_data_t *task_data,
668 unsigned int team_size,
669 unsigned int thread_num, int flags) {
670 switch (endpoint) {
671 case ompt_scope_begin:
672 if (task_data->ptr)
673 printf("%s\n", "0: task_data initially not null");
674 task_data->value = ompt_get_unique_id();
675
676 // there is no parallel_begin callback for implicit parallel region
677 // thus it is initialized in initial task
678 if (flags & ompt_task_initial) {
679 char buffer[2048];
680
681 format_task_type(flags, buffer);
682 // Only check initial task not created by teams construct
683 if (team_size == 1 && thread_num == 1 && parallel_data->ptr)
684 printf("%s\n", "0: parallel_data initially not null");
685 parallel_data->value = ompt_get_unique_id();
686 printf("%" PRIu64 ":" _TOOL_PREFIX
687 " ompt_event_initial_task_begin: parallel_id=%" PRIx64
688 ", task_id=%" PRIx64 ", actual_parallelism=%" PRIu32
689 ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
690 ompt_get_thread_data()->value, parallel_data->value,
691 task_data->value, team_size, thread_num, flags);
692 } else {
693 printf("%" PRIu64 ":" _TOOL_PREFIX
694 " ompt_event_implicit_task_begin: parallel_id=%" PRIx64
695 ", task_id=%" PRIx64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32
696 "\n",
697 ompt_get_thread_data()->value, parallel_data->value,
698 task_data->value, team_size, thread_num);
699 }
700
701 break;
702 case ompt_scope_end:
703 if (flags & ompt_task_initial) {
704 printf("%" PRIu64 ":" _TOOL_PREFIX
705 " ompt_event_initial_task_end: parallel_id=%" PRIx64
706 ", task_id=%" PRIx64 ", actual_parallelism=%" PRIu32
707 ", index=%" PRIu32 "\n",
709 (parallel_data) ? parallel_data->value : 0, task_data->value,
710 team_size, thread_num);
711 } else {
712 printf("%" PRIu64 ":" _TOOL_PREFIX
713 " ompt_event_implicit_task_end: parallel_id=%" PRIx64
714 ", task_id=%" PRIx64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32
715 "\n",
717 (parallel_data) ? parallel_data->value : 0, task_data->value,
718 team_size, thread_num);
719 }
720 break;
721 case ompt_scope_beginend:
722 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
723 exit(-1);
724 }
725}
726
727static void on_ompt_callback_lock_init(ompt_mutex_t kind, unsigned int hint,
728 unsigned int impl,
729 ompt_wait_id_t wait_id,
730 const void *codeptr_ra) {
731 switch (kind) {
732 case ompt_mutex_lock:
733 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_init_lock: wait_id=%" PRIu64
734 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
735 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
736 break;
737 case ompt_mutex_nest_lock:
738 printf("%" PRIu64 ":" _TOOL_PREFIX
739 " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
740 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
741 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
742 break;
743 default:
744 break;
745 }
746}
747
748static void on_ompt_callback_lock_destroy(ompt_mutex_t kind,
749 ompt_wait_id_t wait_id,
750 const void *codeptr_ra) {
751 switch (kind) {
752 case ompt_mutex_lock:
753 printf("%" PRIu64 ":" _TOOL_PREFIX
754 " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
755 ompt_get_thread_data()->value, wait_id, codeptr_ra);
756 break;
757 case ompt_mutex_nest_lock:
758 printf("%" PRIu64 ":" _TOOL_PREFIX
759 " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
760 ", codeptr_ra=%p \n",
761 ompt_get_thread_data()->value, wait_id, codeptr_ra);
762 break;
763 default:
764 break;
765 }
766}
767
768static void on_ompt_callback_work(ompt_work_t wstype,
769 ompt_scope_endpoint_t endpoint,
770 ompt_data_t *parallel_data,
771 ompt_data_t *task_data, uint64_t count,
772 const void *codeptr_ra) {
773 switch (endpoint) {
774 case ompt_scope_begin:
775 printf("%" PRIu64 ":" _TOOL_PREFIX " %s_begin: parallel_id=%" PRIx64
776 ", task_id=%" PRIx64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
778 parallel_data->value, task_data->value, codeptr_ra, count);
779 break;
780 case ompt_scope_end:
781 printf("%" PRIu64 ":" _TOOL_PREFIX " %s_end: parallel_id=%" PRIx64
782 ", task_id=%" PRIx64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
784 parallel_data->value, task_data->value, codeptr_ra, count);
785 break;
786 case ompt_scope_beginend:
787 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
788 exit(-1);
789 }
790}
791
792static void on_ompt_callback_dispatch(ompt_data_t *parallel_data,
793 ompt_data_t *task_data,
794 ompt_dispatch_t kind,
795 ompt_data_t instance) {
796 char *event_name = NULL;
797 void *codeptr_ra = NULL;
798 ompt_dispatch_chunk_t *dispatch_chunk = NULL;
799 switch (kind) {
800 case ompt_dispatch_section:
801 event_name = "ompt_event_section_begin";
802 codeptr_ra = instance.ptr;
803 break;
804 case ompt_dispatch_ws_loop_chunk:
805 event_name = "ompt_event_ws_loop_chunk_begin";
806 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
807 break;
808 case ompt_dispatch_taskloop_chunk:
809 event_name = "ompt_event_taskloop_chunk_begin";
810 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
811 break;
812 case ompt_dispatch_distribute_chunk:
813 event_name = "ompt_event_distribute_chunk_begin";
814 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
815 break;
816 default:
817 event_name = "ompt_ws_loop_iteration_begin";
818 }
819 printf("%" PRIu64 ":" _TOOL_PREFIX " %s: parallel_id=%" PRIx64
820 ", task_id=%" PRIx64 ", codeptr_ra=%p, chunk_start=%" PRIu64
821 ", chunk_iterations=%" PRIu64 "\n",
822 ompt_get_thread_data()->value, event_name, parallel_data->value,
823 task_data->value, codeptr_ra,
824 dispatch_chunk ? dispatch_chunk->start : 0,
825 dispatch_chunk ? dispatch_chunk->iterations : 0);
826}
827
828static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
829 ompt_data_t *parallel_data,
830 ompt_data_t *task_data,
831 const void *codeptr_ra) {
832 switch (endpoint) {
833 case ompt_scope_begin:
834 printf("%" PRIu64 ":" _TOOL_PREFIX
835 " ompt_event_masked_begin: parallel_id=%" PRIx64 ", task_id=%" PRIx64
836 ", codeptr_ra=%p\n",
837 ompt_get_thread_data()->value, parallel_data->value,
838 task_data->value, codeptr_ra);
839 break;
840 case ompt_scope_end:
841 printf("%" PRIu64 ":" _TOOL_PREFIX
842 " ompt_event_masked_end: parallel_id=%" PRIx64 ", task_id=%" PRIx64
843 ", codeptr_ra=%p\n",
844 ompt_get_thread_data()->value, parallel_data->value,
845 task_data->value, codeptr_ra);
846 break;
847 case ompt_scope_beginend:
848 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
849 exit(-1);
850 }
851}
852
854 ompt_data_t *encountering_task_data,
855 const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
856 uint32_t requested_team_size, int flag, const void *codeptr_ra) {
857 if (parallel_data->ptr)
858 printf("0: parallel_data initially not null\n");
859 parallel_data->value = ompt_get_unique_id();
860 int invoker = flag & 0xF;
861 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
862 const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
863 printf("%" PRIu64 ":" _TOOL_PREFIX
864 " ompt_event_%s_begin: parent_task_id=%" PRIx64
865 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
866 "parallel_id=%" PRIx64 ", requested_%s=%" PRIu32
867 ", codeptr_ra=%p, invoker=%d\n",
868 ompt_get_thread_data()->value, event, encountering_task_data->value,
869 encountering_task_frame->exit_frame.ptr,
870 encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
871 requested_team_size, codeptr_ra, invoker);
872}
873
874static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
875 ompt_data_t *encountering_task_data,
876 int flag, const void *codeptr_ra) {
877 int invoker = flag & 0xF;
878 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
879 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIx64
880 ", task_id=%" PRIx64 ", invoker=%d, codeptr_ra=%p\n",
881 ompt_get_thread_data()->value, event, parallel_data->value,
882 encountering_task_data->value, invoker, codeptr_ra);
883}
884
885static void
886on_ompt_callback_task_create(ompt_data_t *encountering_task_data,
887 const ompt_frame_t *encountering_task_frame,
888 ompt_data_t *new_task_data, int type,
889 int has_dependences, const void *codeptr_ra) {
890 if (new_task_data->ptr)
891 printf("0: new_task_data initially not null\n");
892 new_task_data->value = ompt_get_unique_id();
893 char buffer[2048];
894
895 format_task_type(type, buffer);
896
897 printf(
898 "%" PRIu64 ":" _TOOL_PREFIX
899 " ompt_event_task_create: parent_task_id=%" PRIx64
900 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
901 "new_task_id=%" PRIx64
902 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
904 encountering_task_data ? encountering_task_data->value : 0,
905 encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
906 encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
907 new_task_data->value, codeptr_ra, buffer, type,
908 has_dependences ? "yes" : "no");
909}
910
911static void on_ompt_callback_task_schedule(ompt_data_t *first_task_data,
912 ompt_task_status_t prior_task_status,
913 ompt_data_t *second_task_data) {
914 printf("%" PRIu64 ":" _TOOL_PREFIX
915 " ompt_event_task_schedule: first_task_id=%" PRIx64
916 ", second_task_id=%" PRIx64 ", prior_task_status=%s=%d\n",
917 ompt_get_thread_data()->value, first_task_data->value,
918 (second_task_data ? second_task_data->value : -1),
919 ompt_task_status_t_values[prior_task_status], prior_task_status);
920 if (prior_task_status == ompt_task_complete ||
921 prior_task_status == ompt_task_late_fulfill ||
922 prior_task_status == ompt_taskwait_complete) {
923 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIx64
924 "\n",
925 ompt_get_thread_data()->value, first_task_data->value);
926 }
927}
928
929static void on_ompt_callback_dependences(ompt_data_t *task_data,
930 const ompt_dependence_t *deps,
931 int ndeps) {
932 char buffer[2048];
933 char *progress = buffer;
934 int i;
935 for (i = 0; i < ndeps && progress < buffer + 2000; i++) {
936 if (deps[i].dependence_type == ompt_dependence_type_source ||
937 deps[i].dependence_type == ompt_dependence_type_sink)
938 progress +=
939 sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value,
940 ompt_dependence_type_t_values[deps[i].dependence_type]);
941 else
942 progress +=
943 sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
944 ompt_dependence_type_t_values[deps[i].dependence_type]);
945 }
946 if (ndeps > 0)
947 progress[-2] = 0;
948 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIx64
949 ", deps=[%s], ndeps=%d\n",
950 ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
951}
952
953static void on_ompt_callback_task_dependence(ompt_data_t *first_task_data,
954 ompt_data_t *second_task_data) {
955 printf("%" PRIu64 ":" _TOOL_PREFIX
956 " ompt_event_task_dependence_pair: first_task_id=%" PRIx64
957 ", second_task_id=%" PRIx64 "\n",
958 ompt_get_thread_data()->value, first_task_data->value,
959 second_task_data->value);
960}
961
962static void on_ompt_callback_thread_begin(ompt_thread_t thread_type,
963 ompt_data_t *thread_data) {
964 if (thread_data->ptr)
965 printf("%s\n", "0: thread_data initially not null");
966 thread_data->value = ompt_get_unique_id();
967 printf("%" PRIu64 ":" _TOOL_PREFIX
968 " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
970 thread_type, thread_data->value);
971}
972
973static void on_ompt_callback_thread_end(ompt_data_t *thread_data) {
974 printf("%" PRIu64 ":" _TOOL_PREFIX
975 " ompt_event_thread_end: thread_id=%" PRIu64 "\n",
976 ompt_get_thread_data()->value, thread_data->value);
977}
978
979static int on_ompt_callback_control_tool(uint64_t command, uint64_t modifier,
980 void *arg, const void *codeptr_ra) {
981 ompt_frame_t *omptTaskFrame;
982 ompt_get_task_info(0, NULL, (ompt_data_t **)NULL, &omptTaskFrame, NULL, NULL);
983 printf("%" PRIu64 ":" _TOOL_PREFIX
984 " ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64
985 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
986 "current_task_frame.reenter=%p \n",
987 ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
988 omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
989
990 // the following would interfere with expected output for OMPT tests, so skip
991#ifndef _OMPT_TESTS
992 // print task data
993 int task_level = 0;
994 ompt_data_t *task_data;
995 while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
996 NULL, NULL)) {
997 printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIx64 "\n",
998 ompt_get_thread_data()->value, task_level, task_data->value);
999 task_level++;
1000 }
1001
1002 // print parallel data
1003 int parallel_level = 0;
1004 ompt_data_t *parallel_data;
1005 while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)&parallel_data,
1006 NULL)) {
1007 printf("%" PRIu64 ":" _TOOL_PREFIX
1008 " parallel level %d: parallel_id=%" PRIx64 "\n",
1009 ompt_get_thread_data()->value, parallel_level, parallel_data->value);
1010 parallel_level++;
1011 }
1012#endif
1013 return 0; // success
1014}
1015
1016static void on_ompt_callback_error(ompt_severity_t severity,
1017 const char *message, size_t length,
1018 const void *codeptr_ra) {
1019 printf("%" PRIu64 ": ompt_event_runtime_error: severity=%" PRIu32
1020 ", message=%s, length=%" PRIu64 ", codeptr_ra=%p\n",
1021 ompt_get_thread_data()->value, severity, message, (uint64_t)length,
1022 codeptr_ra);
1023}
1024
1025int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num,
1026 ompt_data_t *tool_data) {
1027 ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
1028 ompt_get_callback = (ompt_get_callback_t)lookup("ompt_get_callback");
1029 ompt_get_state = (ompt_get_state_t)lookup("ompt_get_state");
1030 ompt_get_task_info = (ompt_get_task_info_t)lookup("ompt_get_task_info");
1031 ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory");
1032 ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
1034 (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
1035 ompt_get_unique_id = (ompt_get_unique_id_t)lookup("ompt_get_unique_id");
1036 ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool");
1037
1039
1040 ompt_get_num_procs = (ompt_get_num_procs_t)lookup("ompt_get_num_procs");
1041 ompt_get_num_places = (ompt_get_num_places_t)lookup("ompt_get_num_places");
1043 (ompt_get_place_proc_ids_t)lookup("ompt_get_place_proc_ids");
1044 ompt_get_place_num = (ompt_get_place_num_t)lookup("ompt_get_place_num");
1046 (ompt_get_partition_place_nums_t)lookup("ompt_get_partition_place_nums");
1047 ompt_get_proc_id = (ompt_get_proc_id_t)lookup("ompt_get_proc_id");
1049 (ompt_enumerate_states_t)lookup("ompt_enumerate_states");
1051 (ompt_enumerate_mutex_impls_t)lookup("ompt_enumerate_mutex_impls");
1052
1053 register_ompt_callback(ompt_callback_mutex_acquire);
1054 register_ompt_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
1055 register_ompt_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
1056 register_ompt_callback(ompt_callback_nest_lock);
1057 register_ompt_callback(ompt_callback_sync_region);
1058 register_ompt_callback_t(ompt_callback_sync_region_wait,
1059 ompt_callback_sync_region_t);
1060 register_ompt_callback_t(ompt_callback_reduction,
1061 ompt_callback_sync_region_t);
1062#ifndef _OMPT_DISABLE_CONTROL_TOOL
1063 register_ompt_callback(ompt_callback_control_tool);
1064#endif
1065 register_ompt_callback(ompt_callback_flush);
1066 register_ompt_callback(ompt_callback_cancel);
1067 register_ompt_callback(ompt_callback_implicit_task);
1068 register_ompt_callback_t(ompt_callback_lock_init,
1069 ompt_callback_mutex_acquire_t);
1070 register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
1071 register_ompt_callback(ompt_callback_work);
1072 register_ompt_callback(ompt_callback_dispatch);
1073 register_ompt_callback(ompt_callback_masked);
1074 register_ompt_callback(ompt_callback_parallel_begin);
1075 register_ompt_callback(ompt_callback_parallel_end);
1076 register_ompt_callback(ompt_callback_task_create);
1077 register_ompt_callback(ompt_callback_task_schedule);
1078 register_ompt_callback(ompt_callback_dependences);
1079 register_ompt_callback(ompt_callback_task_dependence);
1080 register_ompt_callback(ompt_callback_thread_begin);
1081 register_ompt_callback(ompt_callback_thread_end);
1082 register_ompt_callback(ompt_callback_error);
1083 printf("0: NULL_POINTER=%p\n", (void *)NULL);
1084 return 1; // success
1085}
1086
1087void ompt_finalize(ompt_data_t *tool_data) {
1088 printf("0: ompt_event_runtime_shutdown\n");
1089}
1090
1091#ifdef __cplusplus
1092extern "C" {
1093#endif
1095 const char *runtime_version) {
1097 &ompt_finalize, 0};
1098 return &ompt_start_tool_result;
1099}
1100#ifdef __cplusplus
1101}
1102#endif
1103#endif // ifndef USE_PRIVATE_TOOL
1104#ifdef _OMPT_TESTS
1105#undef _OMPT_TESTS
1106#endif
static void on_ompt_callback_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition callback.h:490
static void on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num, int flags)
Definition callback.h:665
static const char * ompt_thread_t_values[]
Definition callback.h:27
static void print_ids(int level)
Definition callback.h:191
static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data, ompt_data_t *encountering_task_data, int flag, const void *codeptr_ra)
Definition callback.h:874
static void on_ompt_callback_flush(ompt_data_t *thread_data, const void *codeptr_ra)
Definition callback.h:633
static ompt_get_task_info_t ompt_get_task_info
Definition callback.h:160
static ompt_enumerate_states_t ompt_enumerate_states
Definition callback.h:172
static const char * ompt_work_events_t_values[]
Definition callback.h:63
static void on_ompt_callback_lock_destroy(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition callback.h:748
static void on_ompt_callback_task_schedule(ompt_data_t *first_task_data, ompt_task_status_t prior_task_status, ompt_data_t *second_task_data)
Definition callback.h:911
static ompt_get_callback_t ompt_get_callback
Definition callback.h:158
static void format_task_type(int type, char *buffer)
Definition callback.h:129
static ompt_get_place_proc_ids_t ompt_get_place_proc_ids
Definition callback.h:168
static void on_ompt_callback_lock_init(ompt_mutex_t kind, unsigned int hint, unsigned int impl, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition callback.h:727
static ompt_set_callback_t ompt_set_callback
Definition callback.h:157
static ompt_get_place_num_t ompt_get_place_num
Definition callback.h:169
#define register_ompt_callback(name)
Definition callback.h:387
static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
Definition callback.h:828
static void on_ompt_callback_dependences(ompt_data_t *task_data, const ompt_dependence_t *deps, int ndeps)
Definition callback.h:929
static ompt_get_partition_place_nums_t ompt_get_partition_place_nums
Definition callback.h:170
static void on_ompt_callback_task_create(ompt_data_t *encountering_task_data, const ompt_frame_t *encountering_task_frame, ompt_data_t *new_task_data, int type, int has_dependences, const void *codeptr_ra)
Definition callback.h:886
static void on_ompt_callback_error(ompt_severity_t severity, const char *message, size_t length, const void *codeptr_ra)
Definition callback.h:1016
static void on_ompt_callback_nest_lock(ompt_scope_endpoint_t endpoint, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition callback.h:526
int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t *tool_data)
Definition callback.h:1025
static void on_ompt_callback_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition callback.h:441
static const char * ompt_dependence_type_t_values[36]
Definition callback.h:78
static ompt_get_task_memory_t ompt_get_task_memory
Definition callback.h:161
void assert_frame_flags(int enterf, int exitf)
Definition callback.h:175
static int on_ompt_callback_control_tool(uint64_t command, uint64_t modifier, void *arg, const void *codeptr_ra)
Definition callback.h:979
static const char * ompt_task_status_t_values[]
Definition callback.h:31
void ompt_finalize(ompt_data_t *tool_data)
Definition callback.h:1087
static void on_ompt_callback_task_dependence(ompt_data_t *first_task_data, ompt_data_t *second_task_data)
Definition callback.h:953
static void on_ompt_callback_mutex_acquire(ompt_mutex_t kind, unsigned int hint, unsigned int impl, ompt_wait_id_t wait_id, const void *codeptr_ra)
Definition callback.h:390
#define register_ompt_callback_t(name, type)
Definition callback.h:380
static void on_ompt_callback_parallel_begin(ompt_data_t *encountering_task_data, const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, uint32_t requested_team_size, int flag, const void *codeptr_ra)
Definition callback.h:853
static ompt_get_num_procs_t ompt_get_num_procs
Definition callback.h:166
static void on_ompt_callback_thread_end(ompt_data_t *thread_data)
Definition callback.h:973
static void on_ompt_callback_sync_region(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
Definition callback.h:548
static void on_ompt_callback_work(ompt_work_t wstype, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, uint64_t count, const void *codeptr_ra)
Definition callback.h:768
static ompt_finalize_tool_t ompt_finalize_tool
Definition callback.h:165
static ompt_get_num_places_t ompt_get_num_places
Definition callback.h:167
static ompt_get_unique_id_t ompt_get_unique_id
Definition callback.h:164
static ompt_get_state_t ompt_get_state
Definition callback.h:159
static const char * ompt_cancel_flag_t_values[]
Definition callback.h:42
#define _TOOL_PREFIX
Definition callback.h:22
static void on_ompt_callback_thread_begin(ompt_thread_t thread_type, ompt_data_t *thread_data)
Definition callback.h:962
static void on_ompt_callback_sync_region_wait(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
Definition callback.h:582
static void on_ompt_callback_dispatch(ompt_data_t *parallel_data, ompt_data_t *task_data, ompt_dispatch_t kind, ompt_data_t instance)
Definition callback.h:792
static const char * ompt_work_t_values[]
Definition callback.h:48
static const char * ompt_sync_region_t_values[]
Definition callback.h:117
static void on_ompt_callback_reduction(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
Definition callback.h:605
static ompt_get_parallel_info_t ompt_get_parallel_info
Definition callback.h:163
static void on_ompt_callback_cancel(ompt_data_t *task_data, int flags, const void *codeptr_ra)
Definition callback.h:639
static ompt_get_proc_id_t ompt_get_proc_id
Definition callback.h:171
static ompt_get_thread_data_t ompt_get_thread_data
Definition callback.h:162
static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls
Definition callback.h:173
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance * instance
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t length
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s const char ITT_FORMAT s __itt_frame frame
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
#define i
Definition kmp_stub.cpp:87
struct ompt_start_tool_result_t ompt_start_tool_result_t
static ompt_start_tool_result_t * ompt_start_tool_result
#define ompt_start_tool
volatile int flag