LLVM OpenMP 19.0.0git
kmp_sch_simd_runtime_api.c
Go to the documentation of this file.
1// RUN: %libomp-compile-and-run
2
3// The test checks schedule(simd:runtime)
4// in combination with omp_set_schedule()
5#include <stdio.h>
6#include <stdlib.h>
7#include <omp.h>
8
9#if defined(WIN32) || defined(_WIN32)
10#include <windows.h>
11#define delay() Sleep(1);
12#define seten(a,b,c) _putenv_s((a),(b))
13#else
14#include <unistd.h>
15#define delay() usleep(10);
16#define seten(a,b,c) setenv((a),(b),(c))
17#endif
18
19#define SIMD_LEN 4
20int err = 0;
21
22// ---------------------------------------------------------------------------
23// Various definitions copied from OpenMP RTL.
24enum sched {
28};
29typedef unsigned u32;
30typedef long long i64;
31typedef unsigned long long u64;
32typedef struct {
33 int reserved_1;
34 int flags;
35 int reserved_2;
36 int reserved_3;
37 char *psource;
38} id;
39
40#ifdef __cplusplus
41extern "C" {
42#endif
44 void __kmpc_barrier(id*, int gtid);
45 void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
46 void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
47 int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
48 int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
49#ifdef __cplusplus
50} // extern "C"
51#endif
52// End of definitions copied from OpenMP RTL.
53// ---------------------------------------------------------------------------
54static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
55
56// ---------------------------------------------------------------------------
57void
59 int loop_lb, // Loop lower bound.
60 int loop_ub, // Loop upper bound.
61 int loop_st, // Loop stride.
62 int lchunk
63) {
64 static int volatile loop_sync = 0;
65 int lb; // Chunk lower bound.
66 int ub; // Chunk upper bound.
67 int st; // Chunk stride.
68 int rc;
69 int nthreads = omp_get_num_threads();
70 int tid = omp_get_thread_num();
71 int gtid = __kmpc_global_thread_num(&loc);
72 int last;
73 int tc = (loop_ub - loop_lb) / loop_st + 1;
74 int ch;
75 int no_chunk = 0;
76 if (lchunk == 0) {
77 no_chunk = 1;
78 lchunk = 1;
79 }
80 ch = lchunk * SIMD_LEN;
81#if _DEBUG > 1
82 printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
83 gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
84#endif
85 // Don't test degenerate cases that should have been discovered by codegen.
86 if (loop_st == 0)
87 return;
88 if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
89 return;
91 loop_lb, loop_ub, loop_st, SIMD_LEN);
92 {
93 // Let the master thread handle the chunks alone.
94 int chunk; // No of current chunk.
95 int last_ub; // Upper bound of the last processed chunk.
96 u64 cur; // Number of interations in current chunk.
97 u64 max; // Max allowed iterations for current chunk.
98 int undersized = 0;
99 last_ub = loop_ub;
100 chunk = 0;
101 max = (loop_ub - loop_lb) / loop_st + 1;
102 // The first chunk can consume all iterations.
103 while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
104 ++ chunk;
105#if _DEBUG
106 printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
107 tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
108#endif
109 // Check if previous chunk (it is not the final chunk) is undersized.
110 if (undersized)
111 printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
112 if (loop_st > 0) {
113 if (!(ub <= loop_ub))
114 printf("Error with ub %d, %d, ch %d, err %d\n",
115 (int)ub, (int)loop_ub, chunk, ++err);
116 if (!(lb <= ub))
117 printf("Error with bounds %d, %d, %d, err %d\n",
118 (int)lb, (int)ub, chunk, ++err);
119 } else {
120 if (!(ub >= loop_ub))
121 printf("Error with ub %d, %d, %d, err %d\n",
122 (int)ub, (int)loop_ub, chunk, ++err);
123 if (!(lb >= ub))
124 printf("Error with bounds %d, %d, %d, err %d\n",
125 (int)lb, (int)ub, chunk, ++err);
126 }; // if
127 // Stride should not change.
128 if (!(st == loop_st))
129 printf("Error with st %d, %d, ch %d, err %d\n",
130 (int)st, (int)loop_st, chunk, ++err);
131 cur = ( ub - lb ) / loop_st + 1;
132 // Guided scheduling uses FP computations, so current chunk may
133 // be a bit bigger (+1) than allowed maximum.
134 if (!( cur <= max + 1))
135 printf("Error with iter %llu, %llu, err %d\n", cur, max, ++err);
136 // Update maximum for the next chunk.
137 if (last) {
138 if (!no_chunk && cur > ch && nthreads > 1)
139 printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
140 (int)cur, ch, tid, ++err);
141 } else {
142 if (cur % ch)
143 printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
144 chunk, (int)cur, ch, tid, ++err);
145 }
146 if (cur < max)
147 max = cur;
148 last_ub = ub;
149 undersized = (cur < ch);
150#if _DEBUG > 1
151 if (last)
152 printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
153 undersized,cur,ch,tid,ub,lb,loop_st);
154#endif
155 } // while
156 // Must have the right last iteration index.
157 if (loop_st > 0) {
158 if (!(last_ub <= loop_ub))
159 printf("Error with last1 %d, %d, ch %d, err %d\n",
160 (int)last_ub, (int)loop_ub, chunk, ++err);
161 if (last && !(last_ub + loop_st > loop_ub))
162 printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
163 (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
164 } else {
165 if (!(last_ub >= loop_ub))
166 printf("Error with last1 %d, %d, ch %d, err %d\n",
167 (int)last_ub, (int)loop_ub, chunk, ++err);
168 if (last && !(last_ub + loop_st < loop_ub))
169 printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
170 (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
171 } // if
172 }
173 __kmpc_barrier(&loc, gtid);
174} // run_loop
175
176int main(int argc, char *argv[])
177{
178 int chunk = 0;
179// static (no chunk)
181#pragma omp parallel// num_threads(num_th)
182 run_loop(0, 26, 1, chunk);
183
184// auto (chunk should be ignorted)
186#pragma omp parallel// num_threads(num_th)
187 run_loop(0, 26, 1, chunk);
188
189// static,1
190 chunk = 1;
192#pragma omp parallel// num_threads(num_th)
193 run_loop(0, 26, 1, chunk);
194
195// dynamic,1
197#pragma omp parallel// num_threads(num_th)
198 run_loop(0, 26, 1, chunk);
199
200// guided,1
202#pragma omp parallel// num_threads(num_th)
203 run_loop(0, 26, 1, chunk);
204
205// dynamic,0 - use default chunk size 1
207#pragma omp parallel// num_threads(num_th)
208 run_loop(0, 26, 1, chunk);
209
210// guided,0 - use default chunk size 1
212#pragma omp parallel// num_threads(num_th)
213 run_loop(0, 26, 1, chunk);
214
215 if (err) {
216 printf("failed, err = %d\n", err);
217 return 1;
218 } else {
219 printf("passed\n");
220 return 0;
221 }
222}
@ omp_sched_dynamic
Definition: kmp.h:4477
@ omp_sched_auto
Definition: kmp.h:4479
@ omp_sched_guided
Definition: kmp.h:4478
@ omp_sched_static
Definition: kmp.h:4476
long long i64
unsigned long long u64
void __kmpc_barrier(id *, int gtid)
long long i64
void __kmpc_dispatch_init_8(id *, int, enum sched, i64, i64, i64, i64)
void __kmpc_dispatch_init_4(id *, int, enum sched, int, int, int, int)
#define SIMD_LEN
void run_loop(int loop_lb, int loop_ub, int loop_st, int lchunk)
static id loc
int __kmpc_global_thread_num(id *)
@ kmp_sch_runtime_simd
@ kmp_sch_guided_simd
@ kmp_sch_static_balanced_chunked
unsigned long long u64
unsigned u32
int __kmpc_dispatch_next_8(id *, int, void *, void *, void *, void *)
int __kmpc_dispatch_next_4(id *, int, void *, void *, void *, void *)
#define omp_set_schedule
Definition: kmp_stub.cpp:30
int omp_get_num_threads()
int main()
Definition: test-touch.c:21