6#define LOOP_IV_TYPE0 LOOP_TYPES
7#define LOOP_TYPE0 LOOP_TYPES
8#define LOOP_STYPE0 LOOP_TYPES
10#define LOOP_IV_TYPE1 LOOP_TYPES
11#define LOOP_TYPE1 LOOP_TYPES
12#define LOOP_STYPE1 LOOP_TYPES
14#define LOOP_IV_TYPE2 LOOP_TYPES
15#define LOOP_TYPE2 LOOP_TYPES
16#define LOOP_STYPE2 LOOP_TYPES
18#define MAX_THREADS 256
21#define PRINTF(...) printf(__VA_ARGS__)
31LOOP_STYPE1 jA1, jB1, jStep;
32LOOP_STYPE2 kA1, kB1, kStep;
74spaceType *AllocSpace(
unsigned size) {
76 spaceType *
p = (spaceType *)malloc(
size *
sizeof(spaceType));
77 memset(
p, 0,
size *
sizeof(spaceType));
81void FreeSpace(spaceType *space) { free(space); }
84void Set(spaceType *space,
unsigned count,
unsigned trueCount, LOOP_IV_TYPE0
i,
85 LOOP_IV_TYPE1
j, LOOP_IV_TYPE0 k) {
86 if (
count > trueCount) {
93 space[
count - 1].k = k;
101 spaceType *openmpSpace;
102 spaceType *scalarSpace;
104 unsigned trueCount = 0;
105 unsigned openmpCount = 0;
106 unsigned scalarCount = 0;
107 unsigned uselessThreadsOpenMP = 0;
108 unsigned usefulThreadsOpenMP = 0;
115 if (num_threads == 0)
118 if (num_threads > MAX_THREADS)
119 num_threads = MAX_THREADS;
121 unsigned long *chunkSizesOpenmp =
122 (
unsigned long *)malloc(
sizeof(
unsigned long) * num_threads);
123 memset(chunkSizesOpenmp, 0,
sizeof(
unsigned long) * num_threads);
126 LOOP { ++trueCount; }
128 openmpSpace = AllocSpace(trueCount);
129 scalarSpace = AllocSpace(trueCount);
134 Set(scalarSpace, scalarCount, trueCount,
i,
j, k);
139#pragma omp parallel num_threads(num_threads)
141 unsigned gtid = omp_get_thread_num();
142#pragma omp for collapse(3) private(i, j, k)
145#pragma omp atomic update
146 ++chunkSizesOpenmp[gtid];
147#pragma omp atomic capture
148 count = ++openmpCount;
149 Set(openmpSpace,
count, trueCount,
i,
j, k);
155 if (openmpCount < trueCount) {
156 PRINTF(
"OpenMP FAILURE: Openmp processed fewer iterations: %d vs %d\n",
157 openmpCount, trueCount);
159 }
else if (openmpCount > trueCount) {
160 PRINTF(
"OpenMP FAILURE: Openmp processed more iterations: %d vs %d\n",
161 openmpCount, trueCount);
166 for (
unsigned i = 0;
i < trueCount;
i++) {
168 for (
j = 0;
j < openmpCount;
j++) {
169 if ((scalarSpace[
i].
i == openmpSpace[
j].
i) &&
170 (scalarSpace[
i].
j == openmpSpace[
j].
j) &&
171 (scalarSpace[
i].k == openmpSpace[
j].k)) {
175 if (
j == openmpCount) {
176 PRINTF(
"OpenMP FAILURE: (%d %d %d) not processed\n", scalarSpace[
i].
i,
177 scalarSpace[
i].
j, scalarSpace[
i].k);
183 for (
unsigned i = 0;
i < num_threads; ++
i) {
184 if (chunkSizesOpenmp[
i] == 0) {
185 ++uselessThreadsOpenMP;
190 if ((uselessThreadsOpenMP == num_threads - 1) && (trueCount > 1)) {
191 PRINTF(
"OpenMP FAILURE: threads are not used\n");
199 if ((trueCount >= num_threads) && (uselessThreadsOpenMP > 0)) {
200 PRINTF(
"OpenMP FAILURE: %d threads not used with %d iterations\n",
201 uselessThreadsOpenMP, openmpCount);
207 FreeSpace(openmpSpace);
208 FreeSpace(scalarSpace);
209 free(chunkSizesOpenmp);
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
int test(unsigned upper_bound)
int omp_get_max_threads()