LLVM OpenMP 19.0.0git
omp_for_collapse_LowerTriangularLess.c
Go to the documentation of this file.
1// RUN: %libomp-compile-and-run
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5#include "omp.h"
6
7#ifndef MAX_BOUND
8#define MAX_BOUND 64
9#endif
10#ifndef _MSC_VER
11#define NO_EFFICIENCY_CHECK
12#endif
13
14/* To ensure Correctness, only valid iterations are executed and are executed
15 only once. Stores the number of times an iteration is executed. */
16unsigned *execution_count = NULL;
17/* Stores the number of iterations executed by each thread. */
18unsigned *iterations_per_thread = NULL;
19
20unsigned *Alloc(unsigned bound1, unsigned bound2) {
21 return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned)));
22}
23
24void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) {
25 memset(p, 0, bound1 * bound2 * sizeof(unsigned));
26}
27
28void Free(unsigned *p) { free((void *)p); }
29
30unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) {
31 return &p[i * bound2 + j];
32}
33
34int test(unsigned upper_bound) {
35
36 unsigned total_iterations = upper_bound * (upper_bound - 1) / 2;
37 unsigned num_threads = omp_get_max_threads();
38 unsigned lower_per_chunk = total_iterations / num_threads;
39 unsigned upper_per_chunk =
40 lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0);
41 int i, j;
42
43 omp_set_num_threads(num_threads);
44
45 ZeroOut(execution_count, upper_bound, upper_bound);
46 ZeroOut(iterations_per_thread, num_threads, 1);
47
48#ifdef VERBOSE
49 fprintf(stderr,
50 "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] "
51 "chunks "
52 "loop type lower triangle <,< - ",
53 num_threads, upper_bound, total_iterations, lower_per_chunk,
54 upper_per_chunk);
55#endif
56
57#pragma omp parallel shared(iterations_per_thread, execution_count)
58 { /* begin of parallel */
59 /* Lower triangular execution_count matrix */
60#pragma omp for schedule(static) collapse(2)
61 for (i = 0; i < upper_bound; i++) {
62 for (j = 0; j < i; j++) {
63 (*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++;
64 (*Index(execution_count, i, j, upper_bound))++;
65 }
66 } /* end of for*/
67 } /* end of parallel */
68
69 /* check the execution_count array */
70 for (i = 0; i < upper_bound; i++) {
71 for (j = 0; j < i; j++) {
72 unsigned value = *Index(execution_count, i, j, upper_bound);
73 /* iteration with j<=i are valid, but should have been executed only once
74 */
75 if (value != 1) {
76 fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n",
77 i, j, value);
78 return 0;
79 }
80 }
81 for (j = i; j < upper_bound; j++) {
82 unsigned value = *Index(execution_count, i, j, upper_bound);
83 /* iteration with j>=i are invalid and should not have been executed
84 */
85 if (value > 0) {
86 fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n",
87 i, j, value);
88 return 0;
89 }
90 }
91 }
92
93#ifndef NO_EFFICIENCY_CHECK
94 /* Ensure the number of iterations executed by each thread is within bounds */
95 for (i = 0; i < num_threads; i++) {
96 unsigned value = *Index(iterations_per_thread, i, 0, 1);
97 if (value < lower_per_chunk || value > upper_per_chunk) {
98 fprintf(stderr,
99 "ERROR: Inefficient Collapse thread %d of %d assigned %i "
100 "iterations; must be between %d and %d\n",
101 i, num_threads, value, lower_per_chunk, upper_per_chunk);
102 return 0;
103 }
104 }
105#endif
106#ifdef VERBOSE
107 fprintf(stderr, "PASSED\r\n");
108#endif
109 return 1;
110}
111
112int main() {
113
116
117 for (unsigned j = 0; j < MAX_BOUND; j++) {
118 if (!test(j))
119 return 1;
120 }
123 return 0;
124}
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync p
#define i
Definition: kmp_stub.cpp:87
#define omp_set_num_threads
Definition: kmp_stub.cpp:34
void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2)
unsigned * iterations_per_thread
unsigned * Index(unsigned *p, unsigned i, unsigned j, unsigned bound2)
unsigned * execution_count
void Free(unsigned *p)
int test(unsigned upper_bound)
unsigned * Alloc(unsigned bound1, unsigned bound2)
int omp_get_max_threads()