11#define NO_EFFICIENCY_CHECK
20unsigned *
Alloc(
unsigned bound1,
unsigned bound2) {
21 return (
unsigned *)(malloc(bound1 * bound2 *
sizeof(
unsigned)));
24void ZeroOut(
unsigned *
p,
unsigned bound1,
unsigned bound2) {
25 memset(
p, 0, bound1 * bound2 *
sizeof(
unsigned));
28void Free(
unsigned *
p) { free((
void *)
p); }
30unsigned *
Index(
unsigned *
p,
unsigned i,
unsigned j,
unsigned bound2) {
31 return &
p[
i * bound2 +
j];
34int test(
unsigned upper_bound) {
36 unsigned total_iterations = upper_bound * (upper_bound - 1) / 2;
38 unsigned lower_per_chunk = total_iterations / num_threads;
39 unsigned upper_per_chunk =
40 lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0);
50 "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] "
52 "loop type lower triangle <,< - ",
53 num_threads, upper_bound, total_iterations, lower_per_chunk,
57#pragma omp parallel shared(iterations_per_thread, execution_count)
60#pragma omp for schedule(static) collapse(2)
61 for (
i = 0;
i < upper_bound;
i++) {
62 for (
j = 0;
j <
i;
j++) {
70 for (
i = 0;
i < upper_bound;
i++) {
71 for (
j = 0;
j <
i;
j++) {
76 fprintf(stderr,
"ERROR: valid iteration [%i,%i] executed %i times.\n",
81 for (
j =
i;
j < upper_bound;
j++) {
86 fprintf(stderr,
"ERROR: invalid iteration [%i,%i] executed %i times.\n",
93#ifndef NO_EFFICIENCY_CHECK
95 for (
i = 0;
i < num_threads;
i++) {
97 if (value < lower_per_chunk || value > upper_per_chunk) {
99 "ERROR: Inefficient Collapse thread %d of %d assigned %i "
100 "iterations; must be between %d and %d\n",
101 i, num_threads,
value, lower_per_chunk, upper_per_chunk);
107 fprintf(stderr,
"PASSED\r\n");
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync p
#define omp_set_num_threads
void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2)
unsigned * iterations_per_thread
unsigned * Index(unsigned *p, unsigned i, unsigned j, unsigned bound2)
unsigned * execution_count
int test(unsigned upper_bound)
unsigned * Alloc(unsigned bound1, unsigned bound2)
int omp_get_max_threads()