49 if (th->th.th_root->r.r_active &&
50 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws !=
ct_none)) {
51#if KMP_USE_DYNAMIC_LOCK
65 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws !=
ct_none) {
73 bool use_hier =
false) {
80 if (
loc != NULL &&
loc->get_openmp_version() < 50)
93#if KMP_WEIGHTED_ITERATIONS_SUPPORTED
95static inline float __kmp_round_2decimal_val(
float num) {
96 return (
float)(
static_cast<int>(num * 100 + 0.5)) / 100;
98static inline int __kmp_get_round_val(
float num) {
99 return static_cast<int>(num < 0 ? num - 0.5 : num + 0.5);
107 typename traits_t<T>::unsigned_t nchunks, T nproc,
108 typename traits_t<T>::unsigned_t &init,
109 T &small_chunk, T &extras, T &p_extra) {
111#if KMP_WEIGHTED_ITERATIONS_SUPPORTED
116 T pchunks = pr->
u.
p.pchunks;
117 T echunks = nchunks - pchunks;
118 T num_procs_with_pcore = pr->
u.
p.num_procs_with_pcore;
119 T num_procs_with_ecore = nproc - num_procs_with_pcore;
120 T first_thread_with_ecore = pr->
u.
p.first_thread_with_ecore;
122 pchunks / num_procs_with_pcore;
124 echunks / num_procs_with_ecore;
127 (pchunks % num_procs_with_pcore) + (echunks % num_procs_with_ecore);
129 p_extra = (big_chunk - small_chunk);
131 if (
type == KMP_HW_CORE_TYPE_CORE) {
132 if (
id < first_thread_with_ecore) {
133 init =
id * small_chunk +
id * p_extra + (
id < extras ?
id : extras);
135 init =
id * small_chunk + (
id - num_procs_with_ecore) * p_extra +
136 (
id < extras ?
id : extras);
139 if (
id == first_thread_with_ecore) {
140 init =
id * small_chunk +
id * p_extra + (
id < extras ?
id : extras);
142 init =
id * small_chunk + first_thread_with_ecore * p_extra +
143 (
id < extras ?
id : extras);
146 p_extra = (
type == KMP_HW_CORE_TYPE_CORE) ? p_extra : 0;
151 small_chunk = nchunks / nproc;
152 extras = nchunks % nproc;
154 init =
id * small_chunk + (
id < extras ?
id : extras);
157#if KMP_STATIC_STEAL_ENABLED
187 typename traits_t<T>::signed_t st,
191 typename traits_t<T>::signed_t chunk,
193 typedef typename traits_t<T>::unsigned_t UT;
194 typedef typename traits_t<T>::floating_t DBL;
204 typedef typename traits_t<T>::signed_t
ST;
209 "pr:%%p lb:%%%s ub:%%%s st:%%%s "
210 "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n",
211 traits_t<T>::spec, traits_t<T>::spec,
212 traits_t<ST>::spec, traits_t<ST>::spec,
213 traits_t<T>::spec, traits_t<T>::spec);
214 KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid));
220 team = th->th.th_team;
221 active = !team->
t.t_serialized;
224 int itt_need_metadata_reporting =
225 __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
227 team->
t.t_active_level == 1;
230#if KMP_USE_HIER_SCHED
267 schedule = team->
t.t_sched.r_sched_type;
281 chunk = team->
t.t_sched.chunk;
291 "schedule:%%d chunk:%%%s\n",
293 KD_TRACE(10, (buff, gtid, schedule, chunk));
314 "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: "
315 "schedule:%%d chunk:%%%s\n",
317 KD_TRACE(10, (buff, gtid, schedule, chunk));
322#if KMP_STATIC_STEAL_ENABLED
336 schedule = team->
t.t_sched.r_sched_type;
348 chunk = team->
t.t_sched.chunk * chunk;
359 "__kmp_dispatch_init_algorithm: T#%%d new: schedule:%%d"
362 KD_TRACE(10, (buff, gtid, schedule, chunk));
367 pr->
u.
p.parm1 = chunk;
370 "unknown scheduling type");
391 tc = (UT)(lb - ub) / (-st) + 1;
399 tc = (UT)(ub - lb) / st + 1;
417 pr->
u.
p.last_upper = ub + st;
425 pr->
u.
p.ordered_lower = 1;
426 pr->
u.
p.ordered_upper = 0;
431#if KMP_STATIC_STEAL_ENABLED
436 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",
439 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
440 if (nproc > 1 && ntc >= nproc) {
443 T small_chunk, extras, p_extra = 0;
445 int claimed = pr->
steal_flag.compare_exchange_strong(old, CLAIMED);
446 if (traits_t<T>::type_size > 4) {
456#if KMP_WEIGHTED_ITERATIONS_SUPPORTED
459 bool use_hybrid =
false;
461 T first_thread_with_ecore = 0;
462 T num_procs_with_pcore = 0;
463 T num_procs_with_ecore = 0;
464 T p_ntc = 0, e_ntc = 0;
466 __kmp_affinity.type != affinity_explicit) {
470 __kmp_first_osid_with_ecore > -1) {
471 for (
int i = 0;
i < team->
t.t_nproc; ++
i) {
473 ->th.th_topology_attrs.core_type;
474 int id = team->
t.t_threads[
i]->th.th_topology_ids.os_id;
475 if (
id == __kmp_first_osid_with_ecore) {
476 first_thread_with_ecore =
477 team->
t.t_threads[
i]->th.th_info.ds.ds_tid;
479 if (
type == KMP_HW_CORE_TYPE_CORE) {
480 num_procs_with_pcore++;
481 }
else if (
type == KMP_HW_CORE_TYPE_ATOM) {
482 num_procs_with_ecore++;
489 if (num_procs_with_pcore > 0 && num_procs_with_ecore > 0) {
490 float multiplier = 60.0 / 40.0;
491 float p_ratio = (float)num_procs_with_pcore / nproc;
492 float e_ratio = (float)num_procs_with_ecore / nproc;
495 (((multiplier * num_procs_with_pcore) / nproc) + e_ratio);
496 float p_multiplier = multiplier * e_multiplier;
497 p_ntc = __kmp_get_round_val(ntc * p_ratio * p_multiplier);
498 if ((
int)p_ntc > (
int)(ntc * p_ratio * p_multiplier))
500 (
int)(__kmp_round_2decimal_val(ntc * e_ratio * e_multiplier));
502 e_ntc = __kmp_get_round_val(ntc * e_ratio * e_multiplier);
507 use_hybrid = (use_hybrid && (p_ntc >= num_procs_with_pcore &&
508 e_ntc >= num_procs_with_ecore)
516 pr->
u.
p.pchunks = p_ntc;
517 pr->
u.
p.num_procs_with_pcore = num_procs_with_pcore;
518 pr->
u.
p.first_thread_with_ecore = first_thread_with_ecore;
522 T big_chunk = p_ntc / num_procs_with_pcore;
523 small_chunk = e_ntc / num_procs_with_ecore;
526 (p_ntc % num_procs_with_pcore) + (e_ntc % num_procs_with_ecore);
528 p_extra = (big_chunk - small_chunk);
530 if (core_type == KMP_HW_CORE_TYPE_CORE) {
531 if (
id < first_thread_with_ecore) {
533 id * small_chunk +
id * p_extra + (
id < extras ?
id : extras);
535 init =
id * small_chunk + (
id - num_procs_with_ecore) * p_extra +
536 (
id < extras ?
id : extras);
539 if (
id == first_thread_with_ecore) {
541 id * small_chunk +
id * p_extra + (
id < extras ?
id : extras);
543 init =
id * small_chunk + first_thread_with_ecore * p_extra +
544 (
id < extras ?
id : extras);
547 p_extra = (core_type == KMP_HW_CORE_TYPE_CORE) ? p_extra : 0;
551 small_chunk = ntc / nproc;
552 extras = ntc % nproc;
553 init =
id * small_chunk + (
id < extras ?
id : extras);
556 pr->
u.
p.count = init;
558 pr->
u.
p.ub = init + small_chunk + p_extra + (
id < extras ? 1 : 0);
570 pr->
u.
p.parm3 = nproc;
571 pr->
u.
p.parm4 = (
id + 1) % nproc;
576 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d switching to "
577 "kmp_sch_dynamic_chunked\n",
589 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",
599 pr->
u.
p.parm1 = (
id == tc - 1);
606 T small_chunk = tc / nproc;
607 T extras = tc % nproc;
608 init =
id * small_chunk + (
id < extras ?
id : extras);
609 limit = init + small_chunk - (
id < extras ? 0 : 1);
610 pr->
u.
p.parm1 = (
id == nproc - 1);
626 if (itt_need_metadata_reporting)
628 *cur_chunk = limit - init + 1;
631 pr->
u.
p.lb = lb + init;
632 pr->
u.
p.ub = lb + limit;
635 T ub_tmp = lb + limit * st;
636 pr->
u.
p.lb = lb + init * st;
640 pr->
u.
p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
642 pr->
u.
p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
646 pr->
u.
p.ordered_lower = init;
647 pr->
u.
p.ordered_upper = limit;
654 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
655 " -> falling-through to static_greedy\n",
659 pr->
u.
p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
668 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
673 if ((2L * chunk + 1) * nproc >= tc) {
680 *(
double *)&pr->
u.
p.parm3 =
684 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d falling-through to "
685 "kmp_sch_static_greedy\n",
691 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
698 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d "
699 "kmp_sch_guided_analytical_chunked case\n",
703 if ((2L * chunk + 1) * nproc >= tc) {
711#if KMP_USE_X87CONTROL
721 unsigned int oldFpcw = _control87(0, 0);
722 _control87(_PC_64, _MCW_PC);
726 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
733 x = 1.0 - 0.5 / (
double)nproc;
744 ptrdiff_t natural_alignment =
745 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
749 (((ptrdiff_t)&pr->
u.
p.parm3) & (natural_alignment)) == 0);
754 *(DBL *)&pr->
u.
p.parm3 = x;
767 p = __kmp_pow<UT>(x, right);
772 }
while (
p >
target && right < (1 << 27));
780 while (left + 1 < right) {
781 mid = (left + right) / 2;
782 if (__kmp_pow<UT>(x, mid) >
target) {
792 __kmp_pow<UT>(x, cross) <=
target);
795 pr->
u.
p.parm2 = cross;
798#if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8))
799#define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3)
801#define GUIDED_ANALYTICAL_WORKAROUND (x)
808#if KMP_USE_X87CONTROL
810 _control87(oldFpcw, _MCW_PC);
814 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d falling-through to "
815 "kmp_sch_static_greedy\n",
826 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",
828 pr->
u.
p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc;
835 if (pr->
u.
p.parm1 <= 0)
837 else if (pr->
u.
p.parm1 > tc)
841 pr->
u.
p.parm2 = (tc / pr->
u.
p.parm1) + (tc % pr->
u.
p.parm1 ? 1 : 0);
842 KD_TRACE(100, (
"__kmp_dispatch_init_algorithm: T#%d "
843 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
849 T parm1, parm2, parm3, parm4;
851 (
"__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",
857 parm2 = (tc / (2 * nproc));
867 }
else if (parm1 > parm2) {
872 parm3 = (parm2 + parm1);
873 parm3 = (2 * tc + parm3 - 1) / parm3;
881 parm4 = (parm2 - parm1) / parm4;
888 pr->
u.
p.parm1 = parm1;
889 pr->
u.
p.parm2 = parm2;
890 pr->
u.
p.parm3 = parm3;
891 pr->
u.
p.parm4 = parm4;
902 pr->schedule = schedule;
905#if KMP_USE_HIER_SCHED
907inline void __kmp_dispatch_init_hier_runtime(
ident_t *
loc, T lb, T ub,
908 typename traits_t<T>::signed_t st);
913 __kmp_dispatch_init_hierarchy<kmp_int32>(
921 __kmp_dispatch_init_hierarchy<kmp_uint32>(
929 __kmp_dispatch_init_hierarchy<kmp_int64>(
937 __kmp_dispatch_init_hierarchy<kmp_uint64>(
945 for (
int i = 0;
i < num_disp_buff; ++
i) {
949 &team->
t.t_disp_buffer[
i]);
951 sh->hier->deallocate();
963 T ub,
typename traits_t<T>::signed_t st,
964 typename traits_t<T>::signed_t chunk,
int push_ws) {
965 typedef typename traits_t<T>::unsigned_t UT;
986 SSC_MARK_DISPATCH_INIT();
989 typedef typename traits_t<T>::signed_t
ST;
994 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
995 traits_t<ST>::spec, traits_t<T>::spec,
996 traits_t<T>::spec, traits_t<ST>::spec);
997 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st));
1003 team = th->th.th_team;
1004 active = !team->
t.t_serialized;
1005 th->th.th_ident =
loc;
1016#if KMP_USE_HIER_SCHED
1022 my_buffer_index = th->th.th_dispatch->th_disp_index;
1033 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d ordered loop detected. "
1034 "Disabling hierarchical scheduling.\n",
1043 __kmp_dispatch_init_hier_runtime<T>(
loc, lb, ub, st);
1049 int itt_need_metadata_reporting =
1050 __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
1052 team->
t.t_active_level == 1;
1056 th->th.th_dispatch->th_disp_buffer);
1059 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1061 my_buffer_index = th->th.th_dispatch->th_disp_index++;
1069 KD_TRACE(10, (
"__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,
1071 if (sh->buffer_index != my_buffer_index) {
1072 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d"
1073 " sh->buffer_index:%d\n",
1074 gtid, my_buffer_index, sh->buffer_index));
1075 __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
1079 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
1080 "sh->buffer_index:%d\n",
1081 gtid, my_buffer_index, sh->buffer_index));
1089 chunk, (T)th->th.th_team_nproc,
1090 (T)th->th.th_info.ds.ds_tid);
1096 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
1097 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
1100 th->th.th_dispatch->th_dispatch_sh_current =
1104 __kmp_itt_ordered_init(gtid);
1107 if (itt_need_metadata_reporting) {
1115 cur_chunk = pr->
u.
p.parm1;
1131 __kmp_itt_metadata_loop(
loc, schedtype, pr->
u.
p.tc, cur_chunk);
1133#if KMP_USE_HIER_SCHED
1136 pr->
u.
p.ub = pr->
u.
p.lb = pr->
u.
p.st = pr->
u.
p.tc = 0;
1147 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s "
1149 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s"
1150 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1151 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
1152 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1153 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
1154 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
1156 pr->
u.
p.ub, pr->
u.
p.st, pr->
u.
p.tc, pr->
u.
p.count,
1157 pr->
u.
p.ordered_lower, pr->
u.
p.ordered_upper, pr->
u.
p.parm1,
1158 pr->
u.
p.parm2, pr->
u.
p.parm3, pr->
u.
p.parm4));
1162#if OMPT_SUPPORT && OMPT_OPTIONAL
1167 ompt_work_loop, ompt_scope_begin, &(team_info->
parallel_data),
1168 &(task_info->
task_data), pr->
u.
p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
1179template <
typename UT>
1181 typedef typename traits_t<UT>::signed_t
ST;
1185 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid));
1186 if (!th->th.th_team->t.t_serialized) {
1190 th->th.th_dispatch->th_dispatch_pr_current);
1193 th->th.th_dispatch->th_dispatch_sh_current);
1197 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1202 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1206 UT lower = pr->
u.
p.ordered_lower;
1213 "ordered_iteration:%%%s lower:%%%s\n",
1214 traits_t<UT>::spec, traits_t<UT>::spec);
1215 KD_TRACE(1000, (buff, gtid, sh->
u.s.ordered_iteration, lower));
1220 __kmp_wait<UT>(&sh->
u.s.ordered_iteration, lower,
1228 "ordered_iteration:%%%s lower:%%%s\n",
1229 traits_t<UT>::spec, traits_t<UT>::spec);
1230 KD_TRACE(1000, (buff, gtid, sh->
u.s.ordered_iteration, lower));
1235 test_then_inc<ST>((
volatile ST *)&sh->
u.s.ordered_iteration);
1238 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid));
1241#ifdef KMP_GOMP_COMPAT
1243template <
typename UT>
1244static void __kmp_dispatch_finish_chunk(
int gtid,
ident_t *
loc) {
1245 typedef typename traits_t<UT>::signed_t
ST;
1249 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid));
1250 if (!th->th.th_team->t.t_serialized) {
1253 th->th.th_dispatch->th_dispatch_pr_current);
1256 th->th.th_dispatch->th_dispatch_sh_current);
1260 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1262 UT lower = pr->
u.
p.ordered_lower;
1263 UT upper = pr->
u.
p.ordered_upper;
1264 UT inc = upper - lower + 1;
1269 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1280 "__kmp_dispatch_finish_chunk: T#%%d before wait: "
1281 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1282 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1283 KD_TRACE(1000, (buff, gtid, sh->
u.s.ordered_iteration, lower, upper));
1288 __kmp_wait<UT>(&sh->
u.s.ordered_iteration, lower,
1292 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting "
1293 "ordered_bumped to zero\n",
1302 "__kmp_dispatch_finish_chunk: T#%%d after wait: "
1303 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1304 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1305 traits_t<UT>::spec);
1307 (buff, gtid, sh->
u.s.ordered_iteration, inc, lower, upper));
1312 test_then_add<ST>((
volatile ST *)&sh->
u.s.ordered_iteration, inc);
1316 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid));
1321template <
typename T>
1326 typename traits_t<T>::signed_t *p_st, T nproc,
1328 typedef typename traits_t<T>::unsigned_t UT;
1329 typedef typename traits_t<T>::signed_t
ST;
1330 typedef typename traits_t<T>::floating_t DBL;
1335 UT limit, trip, init;
1340 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1350 "sh:%%p nproc:%%%s tid:%%%s\n",
1351 traits_t<T>::spec, traits_t<T>::spec);
1352 KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid));
1358 if (pr->
u.
p.tc == 0) {
1360 (
"__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
1367#if KMP_STATIC_STEAL_ENABLED
1369 T chunk = pr->
u.
p.parm1;
1370 UT nchunks = pr->
u.
p.parm2;
1372 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",
1375 trip = pr->
u.
p.tc - 1;
1377 if (traits_t<T>::type_size > 4) {
1382 if (pr->
u.
p.count < (UT)pr->
u.
p.ub) {
1386 init = (pr->
u.
p.count)++;
1394 T while_limit = pr->
u.
p.parm3;
1396 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1400 while ((!
status) && (while_limit != ++while_index)) {
1403 T victimId = pr->
u.
p.parm4;
1404 T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1406 &team->
t.t_dispatch[victimId].th_disp_buffer[idx]);
1409 oldVictimId != victimId) {
1410 victimId = (victimId + 1) % nproc;
1412 &team->
t.t_dispatch[victimId].th_disp_buffer[idx]);
1425 T small_chunk = 0, extras = 0, p_extra = 0;
1426 __kmp_initialize_self_buffer<T>(team,
id, pr, nchunks, nproc,
1427 init, small_chunk, extras,
1430 pr->
u.
p.count = init + 1;
1431 pr->
u.
p.ub = init + small_chunk + p_extra + (
id < extras ? 1 : 0);
1433 pr->
u.
p.parm4 = (
id + 1) % nproc;
1440 "stolen chunks from T#%%d, "
1441 "count:%%%s ub:%%%s\n",
1442 traits_t<UT>::spec, traits_t<T>::spec);
1443 KD_TRACE(10, (buff, gtid,
id, pr->
u.
p.count, pr->
u.
p.ub));
1448 if (pr->
u.
p.count < (UT)pr->
u.
p.ub)
1454 v->
u.
p.count >= (UT)v->
u.
p.ub) {
1455 pr->
u.
p.parm4 = (victimId + 1) % nproc;
1458 lckv = v->
u.
p.steal_lock;
1462 if (v->
u.
p.count >= limit) {
1464 pr->
u.
p.parm4 = (victimId + 1) % nproc;
1470 remaining = limit - v->
u.
p.count;
1471 if (remaining > 7) {
1474 init = (v->
u.
p.ub -= (remaining >> 2));
1478 init = (v->
u.
p.ub -= 1);
1486 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1487 "count:%%%s ub:%%%s\n",
1488 traits_t<UT>::spec, traits_t<UT>::spec);
1489 KD_TRACE(10, (buff, gtid, victimId, init, limit));
1494 pr->
u.
p.parm4 = victimId;
1498 pr->
u.
p.count = init + 1;
1502 if (init + 1 < limit)
1516 union_i4 vold, vnew;
1517 if (pr->
u.
p.count < (UT)pr->
u.
p.ub) {
1531 init = vold.p.count;
1532 status = (init < (UT)vold.p.ub);
1537 T while_limit = pr->
u.
p.parm3;
1539 int idx = (th->th.th_dispatch->th_disp_index - 1) %
1543 while ((!
status) && (while_limit != ++while_index)) {
1546 T victimId = pr->
u.
p.parm4;
1547 T oldVictimId = victimId ? victimId - 1 : nproc - 1;
1549 &team->
t.t_dispatch[victimId].th_disp_buffer[idx]);
1552 oldVictimId != victimId) {
1553 victimId = (victimId + 1) % nproc;
1555 &team->
t.t_dispatch[victimId].th_disp_buffer[idx]);
1568 T small_chunk = 0, extras = 0, p_extra = 0;
1569 __kmp_initialize_self_buffer<T>(team,
id, pr, nchunks, nproc,
1570 init, small_chunk, extras,
1572 vnew.p.count = init + 1;
1573 vnew.p.ub = init + small_chunk + p_extra + (
id < extras ? 1 : 0);
1580 pr->
u.
p.parm4 = (
id + 1) % nproc;
1587 "stolen chunks from T#%%d, "
1588 "count:%%%s ub:%%%s\n",
1589 traits_t<UT>::spec, traits_t<T>::spec);
1590 KD_TRACE(10, (buff, gtid,
id, pr->
u.
p.count, pr->
u.
p.ub));
1595 if (pr->
u.
p.count < (UT)pr->
u.
p.ub)
1604 vold.p.count >= (UT)vold.p.ub) {
1605 pr->
u.
p.parm4 = (victimId + 1) % nproc;
1609 remaining = vold.p.ub - vold.p.count;
1612 if (remaining > 7) {
1613 vnew.p.ub -= remaining >> 2;
1628 "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, "
1629 "count:%%%s ub:%%%s\n",
1630 traits_t<T>::spec, traits_t<T>::spec);
1631 KD_TRACE(10, (buff, gtid, victimId, vnew.p.ub, vold.p.ub));
1636 vold.p.ub - vnew.p.ub);
1638 pr->
u.
p.parm4 = victimId;
1641 vold.p.count = init + 1;
1648 if (vold.p.count < (UT)vold.p.ub)
1665 limit = chunk + init - 1;
1673 if ((last = (limit >= trip)) != 0)
1679 *p_lb = start + init;
1680 *p_ub = start + limit;
1682 *p_lb = start + init * incr;
1683 *p_ub = start + limit * incr;
1692 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",
1695 if ((
status = !pr->
u.
p.count) != 0) {
1699 last = (pr->
u.
p.parm1 != 0);
1703 pr->
u.
p.lb = pr->
u.
p.ub + pr->
u.
p.st;
1712 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d "
1713 "kmp_sch_static_[affinity|chunked] case\n",
1715 parm1 = pr->
u.
p.parm1;
1717 trip = pr->
u.
p.tc - 1;
1718 init = parm1 * (pr->
u.
p.count + tid);
1720 if ((
status = (init <= trip)) != 0) {
1723 limit = parm1 + init - 1;
1725 if ((last = (limit >= trip)) != 0)
1731 pr->
u.
p.count += nproc;
1734 *p_lb = start + init;
1735 *p_ub = start + limit;
1737 *p_lb = start + init * incr;
1738 *p_ub = start + limit * incr;
1742 pr->
u.
p.ordered_lower = init;
1743 pr->
u.
p.ordered_upper = limit;
1752 UT nchunks = pr->
u.
p.parm2;
1756 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",
1759 chunk_number = test_then_inc_acq<ST>((
volatile ST *)&sh->
u.
s.
iteration);
1760 status = (chunk_number < nchunks);
1768 trip = pr->
u.
p.tc - 1;
1781 *p_lb = start + init;
1782 *p_ub = start + limit;
1784 *p_lb = start + init * incr;
1785 *p_ub = start + limit * incr;
1789 pr->
u.
p.ordered_lower = init;
1790 pr->
u.
p.ordered_upper = limit;
1797 T chunkspec = pr->
u.
p.parm1;
1798 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
1806 remaining = trip - init;
1807 if (remaining <= 0) {
1818 remaining = trip - init;
1819 if (remaining <= 0) {
1824 if ((T)remaining > chunkspec) {
1825 limit = init + chunkspec - 1;
1828 limit = init + remaining - 1;
1833 limit = init + (UT)((
double)remaining *
1834 *(
double *)&pr->
u.
p.parm3);
1836 (
ST)init, (
ST)limit)) {
1848 *p_lb = start + init * incr;
1849 *p_ub = start + limit * incr;
1851 pr->
u.
p.ordered_lower = init;
1852 pr->
u.
p.ordered_upper = limit;
1866 T chunk = pr->
u.
p.parm1;
1868 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",
1875 remaining = trip - init;
1876 if (remaining <= 0) {
1882 if ((T)remaining < pr->u.p.parm2) {
1887 remaining = trip - init;
1888 if (remaining <= 0) {
1893 if ((T)remaining > chunk) {
1894 limit = init + chunk - 1;
1897 limit = init + remaining - 1;
1906 UT rem = span % chunk;
1908 span += chunk - rem;
1909 limit = init + span;
1911 (
ST)init, (
ST)limit)) {
1923 *p_lb = start + init * incr;
1924 *p_ub = start + limit * incr;
1926 pr->
u.
p.ordered_lower = init;
1927 pr->
u.
p.ordered_upper = limit;
1939 T chunkspec = pr->
u.
p.parm1;
1941#if KMP_USE_X87CONTROL
1944 unsigned int oldFpcw;
1945 unsigned int fpcwSet = 0;
1947 KD_TRACE(100, (
"__kmp_dispatch_next_algorithm: T#%d "
1948 "kmp_sch_guided_analytical_chunked case\n",
1958 chunkIdx = test_then_inc_acq<ST>((
volatile ST *)&sh->
u.
s.
iteration);
1959 if (chunkIdx >= (UT)pr->
u.
p.parm2) {
1962 init = chunkIdx * chunkspec + pr->
u.
p.count;
1965 if ((
status = (init > 0 && init <= trip)) != 0) {
1966 limit = init + chunkspec - 1;
1968 if ((last = (limit >= trip)) != 0)
1978#if KMP_USE_X87CONTROL
1983 oldFpcw = _control87(0, 0);
1984 _control87(_PC_64, _MCW_PC);
1989 init = __kmp_dispatch_guided_remaining<T>(
1990 trip, *(DBL *)&pr->
u.
p.parm3, chunkIdx);
1995 limit = trip - __kmp_dispatch_guided_remaining<T>(
1996 trip, *(DBL *)&pr->
u.
p.parm3, chunkIdx + 1);
2006#if KMP_USE_X87CONTROL
2010 if (fpcwSet && (oldFpcw & fpcwSet))
2011 _control87(oldFpcw, _MCW_PC);
2018 *p_lb = start + init * incr;
2019 *p_ub = start + limit * incr;
2021 pr->
u.
p.ordered_lower = init;
2022 pr->
u.
p.ordered_upper = limit;
2035 T parm2 = pr->
u.
p.parm2;
2036 T parm3 = pr->
u.
p.parm3;
2037 T parm4 = pr->
u.
p.parm4;
2039 (
"__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",
2042 index = test_then_inc<ST>((
volatile ST *)&sh->
u.
s.
iteration);
2044 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
2045 trip = pr->
u.
p.tc - 1;
2047 if ((
status = ((T)index < parm3 && init <= trip)) == 0) {
2054 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
2057 if ((last = (limit >= trip)) != 0)
2064 *p_lb = start + init;
2065 *p_ub = start + limit;
2067 *p_lb = start + init * incr;
2068 *p_ub = start + limit * incr;
2072 pr->
u.
p.ordered_lower = init;
2073 pr->
u.
p.ordered_upper = limit;
2093 "ordered_lower:%%%s ordered_upper:%%%s\n",
2094 traits_t<UT>::spec, traits_t<UT>::spec);
2095 KD_TRACE(1000, (buff, gtid, pr->
u.
p.ordered_lower, pr->
u.
p.ordered_upper));
2102 "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d "
2103 "p_lb:%%%s p_ub:%%%s p_st:%%%s\n",
2104 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2107 KD_TRACE(10, (buff, gtid,
status, *p_last, *p_lb, *p_ub, *p_st));
2117#if OMPT_SUPPORT && OMPT_OPTIONAL
2118#define OMPT_LOOP_END \
2119 if (status == 0) { \
2120 if (ompt_enabled.ompt_callback_work) { \
2121 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
2122 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
2123 ompt_callbacks.ompt_callback(ompt_callback_work)( \
2124 ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
2125 &(task_info->task_data), 0, codeptr); \
2128#define OMPT_LOOP_DISPATCH(lb, ub, st, status) \
2129 if (ompt_enabled.ompt_callback_dispatch && status) { \
2130 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
2131 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
2132 ompt_dispatch_chunk_t chunk; \
2133 ompt_data_t instance = ompt_data_none; \
2134 OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, st); \
2135 instance.ptr = &chunk; \
2136 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( \
2137 &(team_info->parallel_data), &(task_info->task_data), \
2138 ompt_dispatch_ws_loop_chunk, instance); \
2142#define OMPT_LOOP_END
2143#define OMPT_LOOP_DISPATCH(lb, ub, st, status)
2146#if KMP_STATS_ENABLED
2147#define KMP_STATS_LOOP_END \
2149 kmp_int64 u, l, t, i; \
2150 l = (kmp_int64)(*p_lb); \
2151 u = (kmp_int64)(*p_ub); \
2152 i = (kmp_int64)(pr->u.p.st); \
2153 if (status == 0) { \
2155 KMP_POP_PARTITIONED_TIMER(); \
2156 } else if (i == 1) { \
2161 } else if (i < 0) { \
2163 t = (l - u) / (-i) + 1; \
2168 t = (u - l) / i + 1; \
2172 KMP_COUNT_VALUE(OMP_loop_dynamic_iterations, t); \
2175#define KMP_STATS_LOOP_END
2178template <
typename T>
2181 typename traits_t<T>::signed_t *p_st
2182#
if OMPT_SUPPORT && OMPT_OPTIONAL
2188 typedef typename traits_t<T>::unsigned_t UT;
2189 typedef typename traits_t<T>::signed_t
ST;
2205 (
"__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",
2206 gtid, p_lb, p_ub, p_st, p_last));
2208 if (team->
t.t_serialized) {
2211 th->th.th_dispatch->th_disp_buffer);
2214 if ((
status = (pr->
u.
p.tc != 0)) == 0) {
2229 UT limit, trip, init;
2231 T chunk = pr->
u.
p.parm1;
2233 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
2236 init = chunk * pr->
u.
p.count++;
2237 trip = pr->
u.
p.tc - 1;
2239 if ((
status = (init <= trip)) == 0) {
2253 limit = chunk + init - 1;
2256 if ((last = (limit >= trip)) != 0) {
2259 pr->
u.
p.last_upper = pr->
u.
p.ub;
2267 *p_lb = start + init;
2268 *p_ub = start + limit;
2270 *p_lb = start + init * incr;
2271 *p_ub = start + limit * incr;
2275 pr->
u.
p.ordered_lower = init;
2276 pr->
u.
p.ordered_upper = limit;
2282 "ordered_lower:%%%s ordered_upper:%%%s\n",
2283 traits_t<UT>::spec, traits_t<UT>::spec);
2284 KD_TRACE(1000, (buff, gtid, pr->
u.
p.ordered_lower,
2285 pr->
u.
p.ordered_upper));
2296 pr->
u.
p.last_upper = *p_ub;
2308 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s "
2309 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
2310 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2311 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last,
2312 (p_last ? *p_last : 0),
status));
2316#if INCLUDE_SSC_MARKS
2317 SSC_MARK_DISPATCH_NEXT();
2328 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2331 th->th.th_dispatch->th_dispatch_pr_current);
2334 th->th.th_dispatch->th_dispatch_sh_current);
2337#if KMP_USE_HIER_SCHED
2339 status = sh->hier->next(
loc, gtid, pr, &last, p_lb, p_ub, p_st);
2342 status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub,
2343 p_st, th->th.th_team_nproc,
2344 th->th.th_info.ds.ds_tid);
2348 num_done = test_then_inc<ST>(&sh->
u.
s.
num_done);
2354 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2355 traits_t<ST>::spec);
2361#if KMP_USE_HIER_SCHED
2364 if (num_done == th->th.th_team_nproc - 1) {
2365#if KMP_STATIC_STEAL_ENABLED
2368 int idx = (th->th.th_dispatch->th_disp_index - 1) %
2371 for (
i = 0;
i < th->th.th_team_nproc; ++
i) {
2374 &team->
t.t_dispatch[
i].th_disp_buffer[idx]);
2377 if (traits_t<T>::type_size > 4) {
2383 buf->u.p.steal_lock = NULL;
2403 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2415 th->th.th_dispatch->th_deo_fcn = NULL;
2416 th->th.th_dispatch->th_dxo_fcn = NULL;
2417 th->th.th_dispatch->th_dispatch_sh_current = NULL;
2418 th->th.th_dispatch->th_dispatch_pr_current = NULL;
2422 pr->
u.
p.last_upper = pr->
u.
p.ub;
2425 if (p_last != NULL &&
status != 0)
2434 "__kmp_dispatch_next: T#%%d normal case: "
2435 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n",
2436 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2437 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,
2438 (p_last ? *p_last : 0),
status));
2442#if INCLUDE_SSC_MARKS
2443 SSC_MARK_DISPATCH_NEXT();
2482 team = th->th.th_team;
2483 active = !team->
t.t_serialized;
2484 th->th.th_ident =
loc;
2487 KD_TRACE(10, (
"__kmpc_sections: called by T#%d\n", gtid));
2495 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2497 my_buffer_index = th->th.th_dispatch->th_disp_index++;
2502 KD_TRACE(10, (
"__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid,
2508 KD_TRACE(100, (
"__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
2509 "sh->buffer_index:%d\n",
2511 __kmp_wait<kmp_uint32>(&sh->
buffer_index, my_buffer_index,
2516 KD_TRACE(100, (
"__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
2517 "sh->buffer_index:%d\n",
2520 th->th.th_dispatch->th_dispatch_pr_current =
2522 th->th.th_dispatch->th_dispatch_sh_current =
2526#if OMPT_SUPPORT && OMPT_OPTIONAL
2531 ompt_work_sections, ompt_scope_begin, &(team_info->
parallel_data),
2560 KD_TRACE(1000, (
"__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid,
2569 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
2573 th->th.th_dispatch->th_dispatch_sh_current);
2577 bool moreSectionsToExecute =
true;
2581 if (sectionIndex >= numberOfSections) {
2582 moreSectionsToExecute =
false;
2587 if (!moreSectionsToExecute) {
2592 if (num_done == th->th.th_team_nproc - 1) {
2603 KD_TRACE(100, (
"__kmpc_next_section: T#%d change buffer_index:%d\n", gtid,
2610 th->th.th_dispatch->th_deo_fcn = NULL;
2611 th->th.th_dispatch->th_dxo_fcn = NULL;
2612 th->th.th_dispatch->th_dispatch_sh_current = NULL;
2613 th->th.th_dispatch->th_dispatch_pr_current = NULL;
2615#if OMPT_SUPPORT && OMPT_OPTIONAL
2619 ompt_data_t
instance = ompt_data_none;
2628 return sectionIndex;
2642 int active = !th->th.th_team->t.t_serialized;
2644 KD_TRACE(100, (
"__kmpc_end_sections: T#%d called\n", gtid));
2648#if OMPT_SUPPORT && OMPT_OPTIONAL
2653 ompt_work_sections, ompt_scope_end, &(team_info->
parallel_data),
2660 KD_TRACE(100, (
"__kmpc_end_sections: T#%d returned\n", gtid));
2663template <
typename T>
2665 kmp_int32 *plastiter, T *plower, T *pupper,
2666 typename traits_t<T>::signed_t incr) {
2667 typedef typename traits_t<T>::unsigned_t UT;
2675 KE_TRACE(10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2677 typedef typename traits_t<T>::signed_t
ST;
2682 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2683 traits_t<T>::spec, traits_t<T>::spec,
2684 traits_t<ST>::spec, traits_t<T>::spec);
2685 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr));
2695 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2710 team = th->th.th_team;
2712 nteams = th->th.th_teams_size.nteams;
2713 team_id = team->
t.t_master_tid;
2718 trip_count = *pupper - *plower + 1;
2719 }
else if (incr == -1) {
2720 trip_count = *plower - *pupper + 1;
2721 }
else if (incr > 0) {
2723 trip_count = (UT)(*pupper - *plower) / incr + 1;
2725 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2728 if (trip_count <= nteams) {
2734 if (team_id < trip_count) {
2735 *pupper = *plower = *plower + team_id * incr;
2737 *plower = *pupper + incr;
2739 if (plastiter != NULL)
2740 *plastiter = (team_id == trip_count - 1);
2743 UT chunk = trip_count / nteams;
2744 UT extras = trip_count % nteams;
2746 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2747 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2748 if (plastiter != NULL)
2749 *plastiter = (team_id == nteams - 1);
2752 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2756 *plower += team_id * chunk_inc_count;
2757 *pupper = *plower + chunk_inc_count - incr;
2760 if (*pupper < *plower)
2761 *pupper = traits_t<T>::max_value;
2762 if (plastiter != NULL)
2763 *plastiter = *plower <= upper && *pupper > upper - incr;
2764 if (*pupper > upper)
2767 if (*pupper > *plower)
2768 *pupper = traits_t<T>::min_value;
2769 if (plastiter != NULL)
2770 *plastiter = *plower >= upper && *pupper < upper - incr;
2771 if (*pupper < upper)
2805#if OMPT_SUPPORT && OMPT_OPTIONAL
2806 OMPT_STORE_RETURN_ADDRESS(gtid);
2808 __kmp_dispatch_init<kmp_int32>(
loc, gtid, schedule, lb, ub, st, chunk,
true);
2817#if OMPT_SUPPORT && OMPT_OPTIONAL
2818 OMPT_STORE_RETURN_ADDRESS(gtid);
2820 __kmp_dispatch_init<kmp_uint32>(
loc, gtid, schedule, lb, ub, st, chunk,
true);
2830#if OMPT_SUPPORT && OMPT_OPTIONAL
2831 OMPT_STORE_RETURN_ADDRESS(gtid);
2833 __kmp_dispatch_init<kmp_int64>(
loc, gtid, schedule, lb, ub, st, chunk,
true);
2843#if OMPT_SUPPORT && OMPT_OPTIONAL
2844 OMPT_STORE_RETURN_ADDRESS(gtid);
2846 __kmp_dispatch_init<kmp_uint64>(
loc, gtid, schedule, lb, ub, st, chunk,
true);
2863#if OMPT_SUPPORT && OMPT_OPTIONAL
2864 OMPT_STORE_RETURN_ADDRESS(gtid);
2866 __kmp_dist_get_bounds<kmp_int32>(
loc, gtid, p_last, &lb, &ub, st);
2867 __kmp_dispatch_init<kmp_int32>(
loc, gtid, schedule, lb, ub, st, chunk,
true);
2875#if OMPT_SUPPORT && OMPT_OPTIONAL
2876 OMPT_STORE_RETURN_ADDRESS(gtid);
2878 __kmp_dist_get_bounds<kmp_uint32>(
loc, gtid, p_last, &lb, &ub, st);
2879 __kmp_dispatch_init<kmp_uint32>(
loc, gtid, schedule, lb, ub, st, chunk,
true);
2887#if OMPT_SUPPORT && OMPT_OPTIONAL
2888 OMPT_STORE_RETURN_ADDRESS(gtid);
2890 __kmp_dist_get_bounds<kmp_int64>(
loc, gtid, p_last, &lb, &ub, st);
2891 __kmp_dispatch_init<kmp_int64>(
loc, gtid, schedule, lb, ub, st, chunk,
true);
2899#if OMPT_SUPPORT && OMPT_OPTIONAL
2900 OMPT_STORE_RETURN_ADDRESS(gtid);
2902 __kmp_dist_get_bounds<kmp_uint64>(
loc, gtid, p_last, &lb, &ub, st);
2903 __kmp_dispatch_init<kmp_uint64>(
loc, gtid, schedule, lb, ub, st, chunk,
true);
2921#if OMPT_SUPPORT && OMPT_OPTIONAL
2922 OMPT_STORE_RETURN_ADDRESS(gtid);
2924 return __kmp_dispatch_next<kmp_int32>(
loc, gtid, p_last, p_lb, p_ub, p_st
2925#
if OMPT_SUPPORT && OMPT_OPTIONAL
2927 OMPT_LOAD_RETURN_ADDRESS(gtid)
2938#if OMPT_SUPPORT && OMPT_OPTIONAL
2939 OMPT_STORE_RETURN_ADDRESS(gtid);
2941 return __kmp_dispatch_next<kmp_uint32>(
loc, gtid, p_last, p_lb, p_ub, p_st
2942#
if OMPT_SUPPORT && OMPT_OPTIONAL
2944 OMPT_LOAD_RETURN_ADDRESS(gtid)
2954#if OMPT_SUPPORT && OMPT_OPTIONAL
2955 OMPT_STORE_RETURN_ADDRESS(gtid);
2957 return __kmp_dispatch_next<kmp_int64>(
loc, gtid, p_last, p_lb, p_ub, p_st
2958#
if OMPT_SUPPORT && OMPT_OPTIONAL
2960 OMPT_LOAD_RETURN_ADDRESS(gtid)
2971#if OMPT_SUPPORT && OMPT_OPTIONAL
2972 OMPT_STORE_RETURN_ADDRESS(gtid);
2974 return __kmp_dispatch_next<kmp_uint64>(
loc, gtid, p_last, p_lb, p_ub, p_st
2975#
if OMPT_SUPPORT && OMPT_OPTIONAL
2977 OMPT_LOAD_RETURN_ADDRESS(gtid)
2989 __kmp_dispatch_finish<kmp_uint32>(gtid,
loc);
2996 __kmp_dispatch_finish<kmp_uint64>(gtid,
loc);
3003 __kmp_dispatch_finish<kmp_uint32>(gtid,
loc);
3010 __kmp_dispatch_finish<kmp_uint64>(gtid,
loc);
3071 void *spin = spinner;
3081 while (!
f(spin,
check)) {
3092#ifdef KMP_GOMP_COMPAT
3098 __kmp_dispatch_init<kmp_int32>(
loc, gtid, schedule, lb, ub, st, chunk,
3106 __kmp_dispatch_init<kmp_uint32>(
loc, gtid, schedule, lb, ub, st, chunk,
3114 __kmp_dispatch_init<kmp_int64>(
loc, gtid, schedule, lb, ub, st, chunk,
3122 __kmp_dispatch_init<kmp_uint64>(
loc, gtid, schedule, lb, ub, st, chunk,
3127 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid,
loc);
3131 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid,
loc);
3135 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid,
loc);
3139 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid,
loc);
void * target(void *task)
void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
sched_type
Describes the loop schedule to be used for a parallel for loop.
void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid, kmp_int32 numberOfSections)
void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
See __kmpc_dispatch_init_4.
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
See __kmpc_dispatch_next_4.
void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
See __kmpc_dispatch_next_4.
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
See __kmpc_dispatch_fini_4.
kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
See __kmpc_dispatch_next_4.
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
See __kmpc_dispatch_fini_4.
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
See __kmpc_dispatch_init_4.
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
See __kmpc_dispatch_init_4.
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
See __kmpc_dispatch_fini_4.
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)
See __kmpc_dispatch_init_4.
@ kmp_sch_runtime_simd
runtime with chunk adjustment
@ kmp_sch_static
static unspecialized
@ kmp_sch_guided_simd
guided with chunk adjustment
@ kmp_sch_guided_chunked
guided unspecialized
@ kmp_sch_dynamic_chunked
@ kmp_sch_guided_analytical_chunked
@ kmp_sch_static_balanced
@ kmp_sch_lower
lower bound for unordered values
@ kmp_nm_upper
upper bound for nomerge values
@ kmp_ord_lower
lower bound for ordered values, must be power of 2
@ kmp_sch_guided_iterative_chunked
@ kmp_sch_static_balanced_chunked
@ kmp_sch_upper
upper bound for unordered values
@ kmp_nm_lower
lower bound for nomerge values
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance * instance
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
#define KMP_DEFAULT_CHUNK
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time)
struct KMP_ALIGN_CACHE dispatch_private_info dispatch_private_info_t
#define SCHEDULE_MONOTONIC
#define SCHEDULE_HAS_MONOTONIC(s)
enum sched_type __kmp_auto
enum sched_type __kmp_static
int __kmp_force_monotonic
kmp_info_t ** __kmp_threads
int __kmp_dispatch_num_buffers
#define SCHEDULE_WITHOUT_MODIFIERS(s)
#define KMP_INIT_YIELD(count)
#define KMP_MASTER_GTID(gtid)
void __kmp_parallel_initialize(void)
#define KMP_INIT_BACKOFF(time)
volatile int __kmp_init_parallel
#define __kmp_allocate(size)
static bool __kmp_is_hybrid_cpu()
int __kmp_env_consistency_check
#define SCHEDULE_HAS_NONMONOTONIC(s)
static int __kmp_gtid_from_tid(int tid, const kmp_team_t *team)
enum sched_type __kmp_guided
void __kmp_resume_if_soft_paused()
static void __kmp_assert_valid_gtid(kmp_int32 gtid)
@ KMP_HW_CORE_TYPE_UNKNOWN
volatile int __kmp_init_serial
static void __kmp_type_convert(T1 src, T2 *dest)
union KMP_ALIGN_CACHE kmp_info kmp_info_t
#define SCHEDULE_NONMONOTONIC
KMP_ARCH_X86 KMP_ARCH_X86 long double
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86<<, 2i, 1, KMP_ARCH_X86) ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, > KMP_ARCH_X86 KMP_ARCH_X86 kmp_uint32
#define KMP_BUILD_ASSERT(expr)
#define KMP_DEBUG_ASSERT(cond)
#define KMP_ASSERT2(cond, msg)
unsigned long long kmp_uint64
kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker)
#define GUIDED_ANALYTICAL_WORKAROUND
int __kmp_dispatch_next_algorithm(int gtid, dispatch_private_info_template< T > *pr, dispatch_shared_info_template< T > volatile *sh, kmp_int32 *p_last, T *p_lb, T *p_ub, typename traits_t< T >::signed_t *p_st, T nproc, T tid)
kmp_uint32 __kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker, kmp_uint32(*pred)(kmp_uint32, kmp_uint32), void *obj)
void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker, kmp_uint32(*pred)(void *, kmp_uint32), void *obj)
#define KMP_STATS_LOOP_END
static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub, typename traits_t< T >::signed_t *p_st)
kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker)
void __kmp_initialize_self_buffer(kmp_team_t *team, T id, dispatch_private_info_template< T > *pr, typename traits_t< T >::unsigned_t nchunks, T nproc, typename traits_t< T >::unsigned_t &init, T &small_chunk, T &extras, T &p_extra)
void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker)
static void __kmp_dispatch_finish(int gtid, ident_t *loc)
kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker)
static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid, kmp_int32 *plastiter, T *plower, T *pupper, typename traits_t< T >::signed_t incr)
void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, dispatch_private_info_template< T > *pr, enum sched_type schedule, T lb, T ub, typename traits_t< T >::signed_t st, typename traits_t< T >::signed_t chunk, T nproc, T tid)
kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker)
void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
#define OMPT_LOOP_DISPATCH(lb, ub, st, status)
static void __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb, T ub, typename traits_t< T >::signed_t st, typename traits_t< T >::signed_t chunk, int push_ws)
static int __kmp_get_monotonicity(ident_t *loc, enum sched_type schedule, bool use_hier=false)
static __inline traits_t< T >::unsigned_t __kmp_dispatch_guided_remaining(T tc, typename traits_t< T >::floating_t base, typename traits_t< T >::unsigned_t idx)
static const int guided_int_param
__forceinline kmp_int32 test_then_inc< kmp_int32 >(volatile kmp_int32 *p)
static const double guided_flt_param
kmp_hier_sched_env_t __kmp_hier_scheds
void __kmp_dispatch_free_hierarchies(kmp_team_t *team)
void __kmp_push_sync(int gtid, enum cons_type ct, ident_t const *ident, kmp_user_lock_p lck)
enum cons_type __kmp_pop_workshare(int gtid, enum cons_type ct, ident_t const *ident)
void __kmp_error_construct(kmp_i18n_id_t id, enum cons_type ct, ident_t const *ident)
void __kmp_pop_sync(int gtid, enum cons_type ct, ident_t const *ident)
static volatile kmp_i18n_cat_status_t status
void __kmp_fatal(kmp_msg_t message,...)
#define KMP_FSYNC_SPIN_ACQUIRED(obj)
#define KMP_FSYNC_SPIN_PREPARE(obj)
#define USE_ITT_BUILD_ARG(x)
#define KMP_FSYNC_SPIN_INIT(obj, spin)
static int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid)
static void __kmp_init_lock(kmp_lock_t *lck)
static void __kmp_destroy_lock(kmp_lock_t *lck)
static void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid)
#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv)
#define KMP_ATOMIC_ST_REL(p, v)
#define KMP_XCHG_FIXED64(p, v)
#define KMP_ATOMIC_LD_ACQ(p)
#define KMP_ATOMIC_ST_RLX(p, v)
#define KMP_ATOMIC_LD_RLX(p)
Functions for collecting statistics.
#define KMP_COUNT_VALUE(n, v)
#define KMP_PUSH_PARTITIONED_TIMER(name)
#define KMP_POP_PARTITIONED_TIMER()
#define KMP_COUNT_DEVELOPER_VALUE(n, v)
#define KMP_TIME_PARTITIONED_BLOCK(name)
#define KMP_COUNT_BLOCK(n)
char * __kmp_str_format(char const *format,...)
void __kmp_str_free(char **str)
ompt_callbacks_active_t ompt_enabled
ompt_callbacks_internal_t ompt_callbacks
#define OMPT_GET_RETURN_ADDRESS(level)
ompt_team_info_t * __ompt_get_teaminfo(int depth, int *size)
ompt_task_info_t * __ompt_get_task_info_object(int depth)
std::atomic< kmp_uint32 > steal_flag
union KMP_ALIGN_CACHE dispatch_private_info_template::private_info_tmpl u
kmp_uint32 ordered_bumped
volatile UT ordered_iteration
union dispatch_shared_info_template::shared_info_tmpl u
volatile kmp_uint32 buffer_index
kmp_hier_layer_e * layers
ompt_data_t parallel_data
dispatch_private_infoXX_template< T > p
dispatch_shared_infoXX_template< UT > s