LLVM OpenMP 20.0.0git
task_teams_stress_test.cpp
Go to the documentation of this file.
1// RUN: %libomp-cxx-compile
2// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run
3// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run
4// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run
5// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
6// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run
7// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run
8// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run
9//
10// RUN: %libomp-cxx-compile -DUSE_HIDDEN_HELPERS=1
11// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run
12// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run
13// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run
14// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
15// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run
16// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run
17// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run
18
19// This test stresses the task team mechanism by running a simple
20// increment task over and over with varying number of threads and nesting.
21// The test covers nested serial teams and mixing serial teams with
22// normal active teams.
23
24#include <assert.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <omp.h>
28
29// The number of times to run each test
30#define NTIMES 5
31
32// Regular single increment task
33void task_inc_a(int *a) {
34#pragma omp task
35 {
36#pragma omp atomic
37 (*a)++;
38 }
39}
40
41// Splitting increment task that binary splits the incrementing task
42void task_inc_split_a(int *a, int low, int high) {
43#pragma omp task firstprivate(low, high)
44 {
45 if (low == high) {
46#pragma omp atomic
47 (*a)++;
48 } else if (low < high) {
49 int mid = (high - low) / 2 + low;
50 task_inc_split_a(a, low, mid);
51 task_inc_split_a(a, mid + 1, high);
52 }
53 }
54}
55
56#ifdef USE_HIDDEN_HELPERS
57// Hidden helper tasks force serial regions to create task teams
58void task_inc_a_hidden_helper(int *a) {
59#pragma omp target map(tofrom : a[0]) nowait
60 {
61#pragma omp atomic
62 (*a)++;
63 }
64}
65#else
66// Detached tasks force serial regions to create task teams
67void task_inc_a_detached(int *a, omp_event_handle_t handle) {
68#pragma omp task detach(handle)
69 {
70#pragma omp atomic
71 (*a)++;
72 omp_fulfill_event(handle);
73 }
74}
75#endif
76
77void check_a(int *a, int expected) {
78 if (*a != expected) {
79 fprintf(stderr,
80 "FAIL: a = %d instead of expected = %d. Compile with "
81 "-DVERBOSE for more verbose output.\n",
82 *a, expected);
83 exit(EXIT_FAILURE);
84 }
85}
86
87// Every thread creates a single "increment" task
88void test_tasks(omp_event_handle_t *handles, int expected, int *a) {
89 int tid = omp_get_thread_num();
90
92
93#pragma omp barrier
94 check_a(a, expected);
95#pragma omp barrier
96 check_a(a, expected);
97#pragma omp barrier
98
99#ifdef USE_HIDDEN_HELPERS
100 task_inc_a_hidden_helper(a);
101#else
102 task_inc_a_detached(a, handles[tid]);
103#endif
104
105#pragma omp barrier
106 check_a(a, 2 * expected);
107#pragma omp barrier
108 task_inc_a(a);
109#pragma omp barrier
110 check_a(a, 3 * expected);
111}
112
113// Testing single level of parallelism with increment tasks
114void test_base(int nthreads) {
115#ifdef VERBOSE
116#pragma omp master
117 printf(" test_base(%d)\n", nthreads);
118#endif
119 int a = 0;
120 omp_event_handle_t *handles;
121 handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads);
122#pragma omp parallel num_threads(nthreads) shared(a)
123 { test_tasks(handles, nthreads, &a); }
124 free(handles);
125}
126
127// Testing nested parallel with increment tasks
128// first = nthreads of outer parallel
129// second = nthreads of nested parallel
130void test_nest(int first, int second) {
131#ifdef VERBOSE
132#pragma omp master
133 printf(" test_nest(%d, %d)\n", first, second);
134#endif
135#pragma omp parallel num_threads(first)
136 { test_base(second); }
137}
138
139// Testing 2-level nested parallels with increment tasks
140// first = nthreads of outer parallel
141// second = nthreads of nested parallel
142// third = nthreads of second nested parallel
143void test_nest2(int first, int second, int third) {
144#ifdef VERBOSE
145#pragma omp master
146 printf(" test_nest2(%d, %d, %d)\n", first, second, third);
147#endif
148#pragma omp parallel num_threads(first)
149 { test_nest(second, third); }
150}
151
152// Testing 3-level nested parallels with increment tasks
153// first = nthreads of outer parallel
154// second = nthreads of nested parallel
155// third = nthreads of second nested parallel
156// fourth = nthreads of third nested parallel
157void test_nest3(int first, int second, int third, int fourth) {
158#ifdef VERBOSE
159#pragma omp master
160 printf(" test_nest3(%d, %d, %d, %d)\n", first, second, third, fourth);
161#endif
162#pragma omp parallel num_threads(first)
163 { test_nest2(second, third, fourth); }
164}
165
166// Testing 4-level nested parallels with increment tasks
167// first = nthreads of outer parallel
168// second = nthreads of nested parallel
169// third = nthreads of second nested parallel
170// fourth = nthreads of third nested parallel
171// fifth = nthreads of fourth nested parallel
172void test_nest4(int first, int second, int third, int fourth, int fifth) {
173#ifdef VERBOSE
174#pragma omp master
175 printf("test_nest4(%d, %d, %d, %d, %d)\n", first, second, third, fourth,
176 fifth);
177#endif
178#pragma omp parallel num_threads(first)
179 { test_nest3(second, third, fourth, fifth); }
180}
181
182// Single thread starts a binary splitting "increment" task
183// Detached tasks are still single "increment" task
184void test_tasks_split(omp_event_handle_t *handles, int expected, int *a) {
185 int tid = omp_get_thread_num();
186
187#pragma omp single
188 task_inc_split_a(a, 1, expected); // task team A
189
190#pragma omp barrier
191 check_a(a, expected);
192#pragma omp barrier
193 check_a(a, expected);
194#pragma omp barrier
195
196#ifdef USE_HIDDEN_HELPERS
197 task_inc_a_hidden_helper(a);
198#else
199 task_inc_a_detached(a, handles[tid]);
200#endif
201
202#pragma omp barrier
203 check_a(a, 2 * expected);
204#pragma omp barrier
205#pragma omp single
206 task_inc_split_a(a, 1, expected); // task team B
207#pragma omp barrier
208 check_a(a, 3 * expected);
209}
210
211// Testing single level of parallelism with splitting incrementing tasks
212void test_base_split(int nthreads) {
213#ifdef VERBOSE
214#pragma omp master
215 printf(" test_base_split(%d)\n", nthreads);
216#endif
217 int a = 0;
218 omp_event_handle_t *handles;
219 handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads);
220#pragma omp parallel num_threads(nthreads) shared(a)
221 { test_tasks_split(handles, nthreads, &a); }
222 free(handles);
223}
224
225// Testing nested parallels with splitting tasks
226// first = nthreads of outer parallel
227// second = nthreads of nested parallel
228void test_nest_split(int first, int second) {
229#ifdef VERBOSE
230#pragma omp master
231 printf(" test_nest_split(%d, %d)\n", first, second);
232#endif
233#pragma omp parallel num_threads(first)
234 { test_base_split(second); }
235}
236
237// Testing doubly nested parallels with splitting tasks
238// first = nthreads of outer parallel
239// second = nthreads of nested parallel
240// third = nthreads of second nested parallel
241void test_nest2_split(int first, int second, int third) {
242#ifdef VERBOSE
243#pragma omp master
244 printf("test_nest2_split(%d, %d, %d)\n", first, second, third);
245#endif
246#pragma omp parallel num_threads(first)
247 { test_nest_split(second, third); }
248}
249
250template <typename... Args>
251void run_ntimes(int n, void (*func)(Args...), Args... args) {
252 for (int i = 0; i < n; ++i) {
253 func(args...);
254 }
255}
256
257int main() {
259
272 run_ntimes(NTIMES, test_nest2, 1, 1, 2);
273 run_ntimes(NTIMES, test_nest2, 1, 2, 1);
274 run_ntimes(NTIMES, test_nest2, 2, 2, 1);
275 run_ntimes(NTIMES, test_nest2, 2, 1, 1);
276 run_ntimes(NTIMES, test_nest2, 4, 2, 1);
277 run_ntimes(NTIMES, test_nest2, 4, 2, 2);
278 run_ntimes(NTIMES, test_nest2, 1, 1, 1);
279 run_ntimes(NTIMES, test_nest2, 4, 2, 2);
280 run_ntimes(NTIMES, test_nest3, 1, 1, 1, 1);
281 run_ntimes(NTIMES, test_nest3, 1, 2, 1, 1);
282 run_ntimes(NTIMES, test_nest3, 1, 1, 2, 1);
283 run_ntimes(NTIMES, test_nest3, 1, 1, 1, 2);
284 run_ntimes(NTIMES, test_nest3, 2, 1, 1, 1);
285 run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1);
286 run_ntimes(NTIMES, test_nest4, 2, 1, 1, 1, 1);
287 run_ntimes(NTIMES, test_nest4, 1, 2, 1, 1, 1);
288 run_ntimes(NTIMES, test_nest4, 1, 1, 2, 1, 1);
289 run_ntimes(NTIMES, test_nest4, 1, 1, 1, 2, 1);
290 run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 2);
291 run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1);
292 run_ntimes(NTIMES, test_nest4, 1, 2, 1, 2, 1);
293
296
298
302
306
315
316 printf("PASS\n");
317 return EXIT_SUCCESS;
318}
void test_tasks()
#define args
#define i
Definition: kmp_stub.cpp:87
#define omp_set_max_active_levels
Definition: kmp_stub.cpp:29
int a
void func(int *num_exec)
void task_inc_a_detached(int *a, omp_event_handle_t handle)
void test_nest_split(int first, int second)
void test_nest2_split(int first, int second, int third)
void test_nest2(int first, int second, int third)
void test_tasks_split(omp_event_handle_t *handles, int expected, int *a)
void test_base_split(int nthreads)
void test_nest4(int first, int second, int third, int fourth, int fifth)
void test_nest3(int first, int second, int third, int fourth)
void run_ntimes(int n, void(*func)(Args...), Args... args)
#define NTIMES
void check_a(int *a, int expected)
void task_inc_split_a(int *a, int low, int high)
void test_base(int nthreads)
void task_inc_a(int *a)
void test_nest(int first, int second)