LLVM OpenMP 19.0.0git
kmp_dispatch_hier.h
Go to the documentation of this file.
1/*
2 * kmp_dispatch_hier.h -- hierarchical scheduling methods and data structures
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_DISPATCH_HIER_H
14#define KMP_DISPATCH_HIER_H
15#include "kmp.h"
16#include "kmp_dispatch.h"
17
18// Layer type for scheduling hierarchy
27};
28
29// Convert hierarchy type (LAYER_L1, LAYER_L2, etc.) to C-style string
30static inline const char *__kmp_get_hier_str(kmp_hier_layer_e type) {
31 switch (type) {
33 return "THREAD";
35 return "L1";
37 return "L2";
39 return "L3";
41 return "NUMA";
43 return "WHOLE_LOOP";
45 return "LAST";
46 }
47 KMP_ASSERT(0);
48 // Appease compilers, should never get here
49 return "ERROR";
50}
51
52// Structure to store values parsed from OMP_SCHEDULE for scheduling hierarchy
53typedef struct kmp_hier_sched_env_t {
54 int size;
60 // Append a level of the hierarchy
62 if (capacity == 0) {
63 scheds = (enum sched_type *)__kmp_allocate(sizeof(enum sched_type) *
64 kmp_hier_layer_e::LAYER_LAST);
66 kmp_hier_layer_e::LAYER_LAST);
68 kmp_hier_layer_e::LAYER_LAST);
70 kmp_hier_layer_e::LAYER_LAST);
71 capacity = kmp_hier_layer_e::LAYER_LAST;
72 }
73 int current_size = size;
74 KMP_DEBUG_ASSERT(current_size < kmp_hier_layer_e::LAYER_LAST);
75 scheds[current_size] = sched;
76 layers[current_size] = layer;
77 small_chunks[current_size] = chunk;
78 large_chunks[current_size] = (kmp_int64)chunk;
79 size++;
80 }
81 // Sort the hierarchy using selection sort, size will always be small
82 // (less than LAYER_LAST) so it is not necessary to use an nlog(n) algorithm
83 void sort() {
84 if (size <= 1)
85 return;
86 for (int i = 0; i < size; ++i) {
87 int switch_index = i;
88 for (int j = i + 1; j < size; ++j) {
89 if (layers[j] < layers[switch_index])
90 switch_index = j;
91 }
92 if (switch_index != i) {
93 kmp_hier_layer_e temp1 = layers[i];
94 enum sched_type temp2 = scheds[i];
95 kmp_int32 temp3 = small_chunks[i];
96 kmp_int64 temp4 = large_chunks[i];
97 layers[i] = layers[switch_index];
98 scheds[i] = scheds[switch_index];
99 small_chunks[i] = small_chunks[switch_index];
100 large_chunks[i] = large_chunks[switch_index];
101 layers[switch_index] = temp1;
102 scheds[switch_index] = temp2;
103 small_chunks[switch_index] = temp3;
104 large_chunks[switch_index] = temp4;
105 }
106 }
107 }
108 // Free all memory
109 void deallocate() {
110 if (capacity > 0) {
115 scheds = NULL;
116 layers = NULL;
117 small_chunks = NULL;
118 large_chunks = NULL;
119 }
120 size = 0;
121 capacity = 0;
122 }
124
127
128// Sizes of layer arrays bounded by max number of detected L1s, L2s, etc.
131
137
138template <typename T> struct kmp_hier_shared_bdata_t {
139 typedef typename traits_t<T>::signed_t ST;
140 volatile kmp_uint64 val[2];
142 T lb[2];
143 T ub[2];
144 ST st[2];
146 void zero() {
147 val[0] = val[1] = 0;
148 status[0] = status[1] = 0;
149 lb[0] = lb[1] = 0;
150 ub[0] = ub[1] = 0;
151 st[0] = st[1] = 0;
152 sh[0].u.s.iteration = sh[1].u.s.iteration = 0;
153 }
154 void set_next_hand_thread(T nlb, T nub, ST nst, kmp_int32 nstatus,
155 kmp_uint64 index) {
156 lb[1 - index] = nlb;
157 ub[1 - index] = nub;
158 st[1 - index] = nst;
159 status[1 - index] = nstatus;
160 }
161 void set_next(T nlb, T nub, ST nst, kmp_int32 nstatus, kmp_uint64 index) {
162 lb[1 - index] = nlb;
163 ub[1 - index] = nub;
164 st[1 - index] = nst;
165 status[1 - index] = nstatus;
166 sh[1 - index].u.s.iteration = 0;
167 }
168
170 return status[1 - index];
171 }
172 T get_next_lb(kmp_uint64 index) const { return lb[1 - index]; }
173 T get_next_ub(kmp_uint64 index) const { return ub[1 - index]; }
174 ST get_next_st(kmp_uint64 index) const { return st[1 - index]; }
176 return &(sh[1 - index]);
177 }
178
179 kmp_int32 get_curr_status(kmp_uint64 index) const { return status[index]; }
180 T get_curr_lb(kmp_uint64 index) const { return lb[index]; }
181 T get_curr_ub(kmp_uint64 index) const { return ub[index]; }
182 ST get_curr_st(kmp_uint64 index) const { return st[index]; }
184 return &(sh[index]);
185 }
186};
187
188/*
189 * In the barrier implementations, num_active is the number of threads that are
190 * attached to the kmp_hier_top_unit_t structure in the scheduling hierarchy.
191 * bdata is the shared barrier data that resides on the kmp_hier_top_unit_t
192 * structure. tdata is the thread private data that resides on the thread
193 * data structure.
194 *
195 * The reset_shared() method is used to initialize the barrier data on the
196 * kmp_hier_top_unit_t hierarchy structure
197 *
198 * The reset_private() method is used to initialize the barrier data on the
199 * thread's private dispatch buffer structure
200 *
201 * The barrier() method takes an id, which is that thread's id for the
202 * kmp_hier_top_unit_t structure, and implements the barrier. All threads wait
203 * inside barrier() until all fellow threads who are attached to that
204 * kmp_hier_top_unit_t structure have arrived.
205 */
206
207// Core barrier implementation
208// Can be used in a unit with between 2 to 8 threads
209template <typename T> class core_barrier_impl {
210 static inline kmp_uint64 get_wait_val(int num_active) {
211 kmp_uint64 wait_val = 0LL;
212 switch (num_active) {
213 case 2:
214 wait_val = 0x0101LL;
215 break;
216 case 3:
217 wait_val = 0x010101LL;
218 break;
219 case 4:
220 wait_val = 0x01010101LL;
221 break;
222 case 5:
223 wait_val = 0x0101010101LL;
224 break;
225 case 6:
226 wait_val = 0x010101010101LL;
227 break;
228 case 7:
229 wait_val = 0x01010101010101LL;
230 break;
231 case 8:
232 wait_val = 0x0101010101010101LL;
233 break;
234 default:
235 // don't use the core_barrier_impl for more than 8 threads
236 KMP_ASSERT(0);
237 }
238 return wait_val;
239 }
240
241public:
242 static void reset_private(kmp_int32 num_active,
243 kmp_hier_private_bdata_t *tdata);
244 static void reset_shared(kmp_int32 num_active,
246 static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t<T> *bdata,
247 kmp_hier_private_bdata_t *tdata);
248};
249
250template <typename T>
252 kmp_hier_private_bdata_t *tdata) {
253 tdata->num_active = num_active;
254 tdata->index = 0;
255 tdata->wait_val[0] = tdata->wait_val[1] = get_wait_val(num_active);
256}
257template <typename T>
260 bdata->val[0] = bdata->val[1] = 0LL;
261 bdata->status[0] = bdata->status[1] = 0LL;
262}
263template <typename T>
266 kmp_hier_private_bdata_t *tdata) {
267 kmp_uint64 current_index = tdata->index;
268 kmp_uint64 next_index = 1 - current_index;
269 kmp_uint64 current_wait_value = tdata->wait_val[current_index];
270 kmp_uint64 next_wait_value =
271 (current_wait_value ? 0 : get_wait_val(tdata->num_active));
272 KD_TRACE(10, ("core_barrier_impl::barrier(): T#%d current_index:%llu "
273 "next_index:%llu curr_wait:%llu next_wait:%llu\n",
274 __kmp_get_gtid(), current_index, next_index, current_wait_value,
275 next_wait_value));
276 char v = (current_wait_value ? '\1' : '\0');
277 (RCAST(volatile char *, &(bdata->val[current_index])))[id] = v;
278 __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
279 __kmp_eq<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
280 tdata->wait_val[current_index] = next_wait_value;
281 tdata->index = next_index;
282}
283
284// Counter barrier implementation
285// Can be used in a unit with arbitrary number of active threads
286template <typename T> class counter_barrier_impl {
287public:
288 static void reset_private(kmp_int32 num_active,
289 kmp_hier_private_bdata_t *tdata);
290 static void reset_shared(kmp_int32 num_active,
292 static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t<T> *bdata,
293 kmp_hier_private_bdata_t *tdata);
294};
295
296template <typename T>
298 kmp_hier_private_bdata_t *tdata) {
299 tdata->num_active = num_active;
300 tdata->index = 0;
301 tdata->wait_val[0] = tdata->wait_val[1] = (kmp_uint64)num_active;
302}
303template <typename T>
306 bdata->val[0] = bdata->val[1] = 0LL;
307 bdata->status[0] = bdata->status[1] = 0LL;
308}
309template <typename T>
312 kmp_hier_private_bdata_t *tdata) {
313 volatile kmp_int64 *val;
314 kmp_uint64 current_index = tdata->index;
315 kmp_uint64 next_index = 1 - current_index;
316 kmp_uint64 current_wait_value = tdata->wait_val[current_index];
317 kmp_uint64 next_wait_value = current_wait_value + tdata->num_active;
318
319 KD_TRACE(10, ("counter_barrier_impl::barrier(): T#%d current_index:%llu "
320 "next_index:%llu curr_wait:%llu next_wait:%llu\n",
321 __kmp_get_gtid(), current_index, next_index, current_wait_value,
322 next_wait_value));
323 val = RCAST(volatile kmp_int64 *, &(bdata->val[current_index]));
325 __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
326 __kmp_ge<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
327 tdata->wait_val[current_index] = next_wait_value;
328 tdata->index = next_index;
329}
330
331// Data associated with topology unit within a layer
332// For example, one kmp_hier_top_unit_t corresponds to one L1 cache
333template <typename T> struct kmp_hier_top_unit_t {
334 typedef typename traits_t<T>::signed_t ST;
335 typedef typename traits_t<T>::unsigned_t UT;
336 kmp_int32 active; // number of topology units that communicate with this unit
337 // chunk information (lower/upper bound, stride, etc.)
339 kmp_hier_top_unit_t<T> *hier_parent; // pointer to parent unit
340 kmp_hier_shared_bdata_t<T> hier_barrier; // shared barrier data for this unit
341
342 kmp_int32 get_hier_id() const { return hier_pr.hier_id; }
345 if (active == 1)
346 return;
347 hier_barrier.zero();
348 if (active >= 2 && active <= 8) {
350 } else {
352 }
353 }
354 void reset_private_barrier(kmp_hier_private_bdata_t *tdata) {
355 KMP_DEBUG_ASSERT(tdata);
357 if (active == 1)
358 return;
359 if (active >= 2 && active <= 8) {
361 } else {
363 }
364 }
365 void barrier(kmp_int32 id, kmp_hier_private_bdata_t *tdata) {
366 KMP_DEBUG_ASSERT(tdata);
368 KMP_DEBUG_ASSERT(id >= 0 && id < active);
369 if (active == 1) {
370 tdata->index = 1 - tdata->index;
371 return;
372 }
373 if (active >= 2 && active <= 8) {
375 } else {
377 }
378 }
379
381 return hier_barrier.get_next_status(index);
382 }
383 T get_next_lb(kmp_uint64 index) const {
384 return hier_barrier.get_next_lb(index);
385 }
386 T get_next_ub(kmp_uint64 index) const {
387 return hier_barrier.get_next_ub(index);
388 }
389 ST get_next_st(kmp_uint64 index) const {
390 return hier_barrier.get_next_st(index);
391 }
393 return hier_barrier.get_next_sh(index);
394 }
395
397 return hier_barrier.get_curr_status(index);
398 }
399 T get_curr_lb(kmp_uint64 index) const {
400 return hier_barrier.get_curr_lb(index);
401 }
402 T get_curr_ub(kmp_uint64 index) const {
403 return hier_barrier.get_curr_ub(index);
404 }
405 ST get_curr_st(kmp_uint64 index) const {
406 return hier_barrier.get_curr_st(index);
407 }
409 return hier_barrier.get_curr_sh(index);
410 }
411
413 kmp_uint64 index) {
414 hier_barrier.set_next_hand_thread(lb, ub, st, status, index);
415 }
416 void set_next(T lb, T ub, ST st, kmp_int32 status, kmp_uint64 index) {
417 hier_barrier.set_next(lb, ub, st, status, index);
418 }
422 return &(hier_parent->hier_pr);
423 }
424
425 kmp_int32 is_active() const { return active; }
426 kmp_int32 get_num_active() const { return active; }
427#ifdef KMP_DEBUG
428 void print() {
429 KD_TRACE(
430 10,
431 (" kmp_hier_top_unit_t: active:%d pr:%p lb:%d ub:%d st:%d tc:%d\n",
432 active, &hier_pr, hier_pr.u.p.lb, hier_pr.u.p.ub, hier_pr.u.p.st,
433 hier_pr.u.p.tc));
434 }
435#endif
436};
437
438// Information regarding a single layer within the scheduling hierarchy
439template <typename T> struct kmp_hier_layer_info_t {
440 int num_active; // number of threads active in this level
441 kmp_hier_layer_e type; // LAYER_L1, LAYER_L2, etc.
442 enum sched_type sched; // static, dynamic, guided, etc.
443 typename traits_t<T>::signed_t chunk; // chunk size associated with schedule
444 int length; // length of the kmp_hier_top_unit_t array
445
446#ifdef KMP_DEBUG
447 // Print this layer's information
448 void print() {
449 const char *t = __kmp_get_hier_str(type);
450 KD_TRACE(
451 10,
452 (" kmp_hier_layer_info_t: num_active:%d type:%s sched:%d chunk:%d "
453 "length:%d\n",
455 }
456#endif
457};
458
459/*
460 * Structure to implement entire hierarchy
461 *
462 * The hierarchy is kept as an array of arrays to represent the different
463 * layers. Layer 0 is the lowest layer to layer num_layers - 1 which is the
464 * highest layer.
465 * Example:
466 * [ 2 ] -> [ L3 | L3 ]
467 * [ 1 ] -> [ L2 | L2 | L2 | L2 ]
468 * [ 0 ] -> [ L1 | L1 | L1 | L1 | L1 | L1 | L1 | L1 ]
469 * There is also an array of layer_info_t which has information regarding
470 * each layer
471 */
472template <typename T> struct kmp_hier_t {
473public:
474 typedef typename traits_t<T>::unsigned_t UT;
475 typedef typename traits_t<T>::signed_t ST;
476
477private:
478 int next_recurse(ident_t *loc, int gtid, kmp_hier_top_unit_t<T> *current,
479 kmp_int32 *p_last, T *p_lb, T *p_ub, ST *p_st,
480 kmp_int32 previous_id, int hier_level) {
481 int status;
482 kmp_info_t *th = __kmp_threads[gtid];
483 auto parent = current->get_parent();
484 bool last_layer = (hier_level == get_num_layers() - 1);
486 kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[hier_level]);
487 KMP_DEBUG_ASSERT(current);
488 KMP_DEBUG_ASSERT(hier_level >= 0);
489 KMP_DEBUG_ASSERT(hier_level < get_num_layers());
490 KMP_DEBUG_ASSERT(tdata);
491 KMP_DEBUG_ASSERT(parent || last_layer);
492
493 KD_TRACE(
494 1, ("kmp_hier_t.next_recurse(): T#%d (%d) called\n", gtid, hier_level));
495
496 T hier_id = (T)current->get_hier_id();
497 // Attempt to grab next iteration range for this level
498 if (previous_id == 0) {
499 KD_TRACE(1, ("kmp_hier_t.next_recurse(): T#%d (%d) is primary of unit\n",
500 gtid, hier_level));
501 kmp_int32 contains_last;
502 T my_lb, my_ub;
503 ST my_st;
504 T nproc;
505 dispatch_shared_info_template<T> volatile *my_sh;
507 if (last_layer) {
508 // last layer below the very top uses the single shared buffer
509 // from the team struct.
510 KD_TRACE(10,
511 ("kmp_hier_t.next_recurse(): T#%d (%d) using top level sh\n",
512 gtid, hier_level));
513 my_sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
514 th->th.th_dispatch->th_dispatch_sh_current);
515 nproc = (T)get_top_level_nproc();
516 } else {
517 // middle layers use the shared buffer inside the kmp_hier_top_unit_t
518 // structure
519 KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) using hier sh\n",
520 gtid, hier_level));
521 my_sh =
522 parent->get_curr_sh(th->th.th_hier_bar_data[hier_level + 1].index);
523 nproc = (T)parent->get_num_active();
524 }
525 my_pr = current->get_my_pr();
526 KMP_DEBUG_ASSERT(my_sh);
527 KMP_DEBUG_ASSERT(my_pr);
528 enum sched_type schedule = get_sched(hier_level);
529 ST chunk = (ST)get_chunk(hier_level);
530 status = __kmp_dispatch_next_algorithm<T>(gtid, my_pr, my_sh,
531 &contains_last, &my_lb, &my_ub,
532 &my_st, nproc, hier_id);
533 KD_TRACE(
534 10,
535 ("kmp_hier_t.next_recurse(): T#%d (%d) next_pr_sh() returned %d\n",
536 gtid, hier_level, status));
537 // When no iterations are found (status == 0) and this is not the last
538 // layer, attempt to go up the hierarchy for more iterations
539 if (status == 0 && !last_layer) {
540 kmp_int32 hid;
541 __kmp_type_convert(hier_id, &hid);
542 status = next_recurse(loc, gtid, parent, &contains_last, &my_lb, &my_ub,
543 &my_st, hid, hier_level + 1);
544 KD_TRACE(
545 10,
546 ("kmp_hier_t.next_recurse(): T#%d (%d) hier_next() returned %d\n",
547 gtid, hier_level, status));
548 if (status == 1) {
549 kmp_hier_private_bdata_t *upper_tdata =
550 &(th->th.th_hier_bar_data[hier_level + 1]);
551 my_sh = parent->get_curr_sh(upper_tdata->index);
552 KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) about to init\n",
553 gtid, hier_level));
554 __kmp_dispatch_init_algorithm(loc, gtid, my_pr, schedule,
555 parent->get_curr_lb(upper_tdata->index),
556 parent->get_curr_ub(upper_tdata->index),
557 parent->get_curr_st(upper_tdata->index),
558#if USE_ITT_BUILD
559 NULL,
560#endif
561 chunk, nproc, hier_id);
562 status = __kmp_dispatch_next_algorithm<T>(
563 gtid, my_pr, my_sh, &contains_last, &my_lb, &my_ub, &my_st, nproc,
564 hier_id);
565 if (!status) {
566 KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) status not 1 "
567 "setting to 2!\n",
568 gtid, hier_level));
569 status = 2;
570 }
571 }
572 }
573 current->set_next(my_lb, my_ub, my_st, status, tdata->index);
574 // Propagate whether a unit holds the actual global last iteration
575 // The contains_last attribute is sent downwards from the top to the
576 // bottom of the hierarchy via the contains_last flag inside the
577 // private dispatch buffers in the hierarchy's middle layers
578 if (contains_last) {
579 // If the next_algorithm() method returns 1 for p_last and it is the
580 // last layer or our parent contains the last serial chunk, then the
581 // chunk must contain the last serial iteration.
582 if (last_layer || parent->hier_pr.flags.contains_last) {
583 KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) Setting this pr "
584 "to contain last.\n",
585 gtid, hier_level));
586 current->hier_pr.flags.contains_last = contains_last;
587 }
588 if (!current->hier_pr.flags.contains_last)
589 contains_last = FALSE;
590 }
591 if (p_last)
592 *p_last = contains_last;
593 } // if primary thread of this unit
594 if (hier_level > 0 || !__kmp_dispatch_hand_threading) {
595 KD_TRACE(10,
596 ("kmp_hier_t.next_recurse(): T#%d (%d) going into barrier.\n",
597 gtid, hier_level));
598 current->barrier(previous_id, tdata);
599 KD_TRACE(10,
600 ("kmp_hier_t.next_recurse(): T#%d (%d) released and exit %d\n",
601 gtid, hier_level, current->get_curr_status(tdata->index)));
602 } else {
603 KMP_DEBUG_ASSERT(previous_id == 0);
604 return status;
605 }
606 return current->get_curr_status(tdata->index);
607 }
608
609public:
612 bool valid;
616 // Deallocate all memory from this hierarchy
617 void deallocate() {
618 for (int i = 0; i < num_layers; ++i)
619 if (layers[i] != NULL) {
621 }
622 if (layers != NULL) {
624 layers = NULL;
625 }
626 if (info != NULL) {
628 info = NULL;
629 }
630 num_layers = 0;
631 valid = false;
632 }
633 // Returns true if reallocation is needed else false
634 bool need_to_reallocate(int n, const kmp_hier_layer_e *new_layers,
635 const enum sched_type *new_scheds,
636 const ST *new_chunks) const {
637 if (!valid || layers == NULL || info == NULL ||
638 traits_t<T>::type_size != type_size || n != num_layers)
639 return true;
640 for (int i = 0; i < n; ++i) {
641 if (info[i].type != new_layers[i])
642 return true;
643 if (info[i].sched != new_scheds[i])
644 return true;
645 if (info[i].chunk != new_chunks[i])
646 return true;
647 }
648 return false;
649 }
650 // A single thread should call this function while the other threads wait
651 // create a new scheduling hierarchy consisting of new_layers, new_scheds
652 // and new_chunks. These should come pre-sorted according to
653 // kmp_hier_layer_e value. This function will try to avoid reallocation
654 // if it can
655 void allocate_hier(int n, const kmp_hier_layer_e *new_layers,
656 const enum sched_type *new_scheds, const ST *new_chunks) {
657 top_level_nproc = 0;
658 if (!need_to_reallocate(n, new_layers, new_scheds, new_chunks)) {
659 KD_TRACE(
660 10,
661 ("kmp_hier_t<T>::allocate_hier: T#0 do not need to reallocate\n"));
662 for (int i = 0; i < n; ++i) {
663 info[i].num_active = 0;
664 for (int j = 0; j < get_length(i); ++j)
665 layers[i][j].active = 0;
666 }
667 return;
668 }
669 KD_TRACE(10, ("kmp_hier_t<T>::allocate_hier: T#0 full alloc\n"));
670 deallocate();
671 type_size = traits_t<T>::type_size;
672 num_layers = n;
674 sizeof(kmp_hier_layer_info_t<T>) * n);
676 sizeof(kmp_hier_top_unit_t<T> *) * n);
677 for (int i = 0; i < n; ++i) {
678 int max = 0;
679 kmp_hier_layer_e layer = new_layers[i];
680 info[i].num_active = 0;
681 info[i].type = layer;
682 info[i].sched = new_scheds[i];
683 info[i].chunk = new_chunks[i];
684 max = __kmp_hier_max_units[layer + 1];
685 if (max == 0) {
686 valid = false;
687 KMP_WARNING(HierSchedInvalid, __kmp_get_hier_str(layer));
688 deallocate();
689 return;
690 }
691 info[i].length = max;
693 sizeof(kmp_hier_top_unit_t<T>) * max);
694 for (int j = 0; j < max; ++j) {
695 layers[i][j].active = 0;
696 layers[i][j].hier_pr.flags.use_hier = TRUE;
697 }
698 }
699 valid = true;
700 }
701 // loc - source file location
702 // gtid - global thread identifier
703 // pr - this thread's private dispatch buffer (corresponding with gtid)
704 // p_last (return value) - pointer to flag indicating this set of iterations
705 // contains last
706 // iteration
707 // p_lb (return value) - lower bound for this chunk of iterations
708 // p_ub (return value) - upper bound for this chunk of iterations
709 // p_st (return value) - stride for this chunk of iterations
710 //
711 // Returns 1 if there are more iterations to perform, 0 otherwise
713 kmp_int32 *p_last, T *p_lb, T *p_ub, ST *p_st) {
714 int status;
715 kmp_int32 contains_last = 0;
716 kmp_info_t *th = __kmp_threads[gtid];
717 kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[0]);
718 auto parent = pr->get_parent();
721 KMP_DEBUG_ASSERT(tdata);
723 T nproc = (T)parent->get_num_active();
724 T unit_id = (T)pr->get_hier_id();
725 KD_TRACE(
726 10,
727 ("kmp_hier_t.next(): T#%d THREAD LEVEL nproc:%d unit_id:%d called\n",
728 gtid, nproc, unit_id));
729 // Handthreading implementation
730 // Each iteration is performed by all threads on last unit (typically
731 // cores/tiles)
732 // e.g., threads 0,1,2,3 all execute iteration 0
733 // threads 0,1,2,3 all execute iteration 1
734 // threads 4,5,6,7 all execute iteration 2
735 // threads 4,5,6,7 all execute iteration 3
736 // ... etc.
738 KD_TRACE(10,
739 ("kmp_hier_t.next(): T#%d THREAD LEVEL using hand threading\n",
740 gtid));
741 if (unit_id == 0) {
742 // For hand threading, the sh buffer on the lowest level is only ever
743 // modified and read by the primary thread on that level. Because of
744 // this, we can always use the first sh buffer.
745 auto sh = &(parent->hier_barrier.sh[0]);
747 status = __kmp_dispatch_next_algorithm<T>(
748 gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id);
749 if (!status) {
750 bool done = false;
751 while (!done) {
752 done = true;
753 kmp_int32 uid;
754 __kmp_type_convert(unit_id, &uid);
755 status = next_recurse(loc, gtid, parent, &contains_last, p_lb, p_ub,
756 p_st, uid, 0);
757 if (status == 1) {
759 parent->get_next_lb(tdata->index),
760 parent->get_next_ub(tdata->index),
761 parent->get_next_st(tdata->index),
762#if USE_ITT_BUILD
763 NULL,
764#endif
765 pr->u.p.parm1, nproc, unit_id);
766 sh->u.s.iteration = 0;
767 status = __kmp_dispatch_next_algorithm<T>(
768 gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc,
769 unit_id);
770 if (!status) {
771 KD_TRACE(10,
772 ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 0 "
773 "after next_pr_sh()"
774 "trying again.\n",
775 gtid));
776 done = false;
777 }
778 } else if (status == 2) {
779 KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 2 "
780 "trying again.\n",
781 gtid));
782 done = false;
783 }
784 }
785 }
786 parent->set_next_hand_thread(*p_lb, *p_ub, *p_st, status, tdata->index);
787 } // if primary thread of lowest unit level
788 parent->barrier(pr->get_hier_id(), tdata);
789 if (unit_id != 0) {
790 *p_lb = parent->get_curr_lb(tdata->index);
791 *p_ub = parent->get_curr_ub(tdata->index);
792 *p_st = parent->get_curr_st(tdata->index);
793 status = parent->get_curr_status(tdata->index);
794 }
795 } else {
796 // Normal implementation
797 // Each thread grabs an iteration chunk and executes it (no cooperation)
798 auto sh = parent->get_curr_sh(tdata->index);
800 status = __kmp_dispatch_next_algorithm<T>(
801 gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id);
802 KD_TRACE(10,
803 ("kmp_hier_t.next(): T#%d THREAD LEVEL next_algorithm status:%d "
804 "contains_last:%d p_lb:%d p_ub:%d p_st:%d\n",
805 gtid, status, contains_last, *p_lb, *p_ub, *p_st));
806 if (!status) {
807 bool done = false;
808 while (!done) {
809 done = true;
810 kmp_int32 uid;
811 __kmp_type_convert(unit_id, &uid);
812 status = next_recurse(loc, gtid, parent, &contains_last, p_lb, p_ub,
813 p_st, uid, 0);
814 if (status == 1) {
815 sh = parent->get_curr_sh(tdata->index);
817 parent->get_curr_lb(tdata->index),
818 parent->get_curr_ub(tdata->index),
819 parent->get_curr_st(tdata->index),
820#if USE_ITT_BUILD
821 NULL,
822#endif
823 pr->u.p.parm1, nproc, unit_id);
824 status = __kmp_dispatch_next_algorithm<T>(
825 gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id);
826 if (!status) {
827 KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 0 "
828 "after next_pr_sh()"
829 "trying again.\n",
830 gtid));
831 done = false;
832 }
833 } else if (status == 2) {
834 KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 2 "
835 "trying again.\n",
836 gtid));
837 done = false;
838 }
839 }
840 }
841 }
842 if (contains_last && !parent->hier_pr.flags.contains_last) {
843 KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL resetting "
844 "contains_last to FALSE\n",
845 gtid));
846 contains_last = FALSE;
847 }
848 if (p_last)
849 *p_last = contains_last;
850 KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL exit status %d\n", gtid,
851 status));
852 return status;
853 }
854 // These functions probe the layer info structure
855 // Returns the type of topology unit given level
859 return info[level].type;
860 }
861 // Returns the schedule type at given level
862 enum sched_type get_sched(int level) const {
865 return info[level].sched;
866 }
867 // Returns the chunk size at given level
868 ST get_chunk(int level) const {
871 return info[level].chunk;
872 }
873 // Returns the number of active threads at given level
874 int get_num_active(int level) const {
877 return info[level].num_active;
878 }
879 // Returns the length of topology unit array at given level
880 int get_length(int level) const {
883 return info[level].length;
884 }
885 // Returns the topology unit given the level and index
889 KMP_DEBUG_ASSERT(index >= 0);
891 return &(layers[level][index]);
892 }
893 // Returns the number of layers in the hierarchy
894 int get_num_layers() const { return num_layers; }
895 // Returns the number of threads in the top layer
896 // This is necessary because we don't store a topology unit as
897 // the very top level and the scheduling algorithms need this information
898 int get_top_level_nproc() const { return top_level_nproc; }
899 // Return whether this hierarchy is valid or not
900 bool is_valid() const { return valid; }
901#ifdef KMP_DEBUG
902 // Print the hierarchy
903 void print() {
904 KD_TRACE(10, ("kmp_hier_t:\n"));
905 for (int i = num_layers - 1; i >= 0; --i) {
906 KD_TRACE(10, ("Info[%d] = ", i));
907 info[i].print();
908 }
909 for (int i = num_layers - 1; i >= 0; --i) {
910 KD_TRACE(10, ("Layer[%d] =\n", i));
911 for (int j = 0; j < info[i].length; ++j) {
912 layers[i][j].print();
913 }
914 }
915 }
916#endif
917};
918
919template <typename T>
921 kmp_hier_layer_e *new_layers,
922 enum sched_type *new_scheds,
923 typename traits_t<T>::signed_t *new_chunks,
924 T lb, T ub,
925 typename traits_t<T>::signed_t st) {
926 int tid, gtid, num_hw_threads, num_threads_per_layer1, active;
927 unsigned int my_buffer_index;
928 kmp_info_t *th;
929 kmp_team_t *team;
932 gtid = __kmp_entry_gtid();
933 tid = __kmp_tid_from_gtid(gtid);
934#ifdef KMP_DEBUG
935 KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d called: %d layer(s)\n",
936 gtid, n));
937 for (int i = 0; i < n; ++i) {
938 const char *layer = __kmp_get_hier_str(new_layers[i]);
939 KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d: new_layers[%d] = %s, "
940 "new_scheds[%d] = %d, new_chunks[%d] = %u\n",
941 gtid, i, layer, i, (int)new_scheds[i], i, new_chunks[i]));
942 }
943#endif // KMP_DEBUG
944 KMP_DEBUG_ASSERT(n > 0);
945 KMP_DEBUG_ASSERT(new_layers);
946 KMP_DEBUG_ASSERT(new_scheds);
947 KMP_DEBUG_ASSERT(new_chunks);
951
952 th = __kmp_threads[gtid];
953 team = th->th.th_team;
954 active = !team->t.t_serialized;
955 th->th.th_ident = loc;
957 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
958 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
959 my_buffer_index = th->th.th_dispatch->th_disp_index;
960 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
961 &th->th.th_dispatch
962 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
963 sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
964 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
965 if (!active) {
966 KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d not active parallel. "
967 "Using normal dispatch functions.\n",
968 gtid));
970 pr->flags.use_hier = FALSE;
972 return;
973 }
976 pr->flags.use_hier = TRUE;
977 pr->u.p.tc = 0;
978 // Have primary thread allocate the hierarchy
979 if (__kmp_tid_from_gtid(gtid) == 0) {
980 KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d pr:%p sh:%p allocating "
981 "hierarchy\n",
982 gtid, pr, sh));
983 if (sh->hier == NULL) {
984 sh->hier = (kmp_hier_t<T> *)__kmp_allocate(sizeof(kmp_hier_t<T>));
985 }
986 sh->hier->allocate_hier(n, new_layers, new_scheds, new_chunks);
987 sh->u.s.iteration = 0;
988 }
989 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
990 // Check to make sure the hierarchy is valid
991 kmp_hier_t<T> *hier = sh->hier;
992 if (!sh->hier->is_valid()) {
993 pr->flags.use_hier = FALSE;
994 return;
995 }
996 // Have threads allocate their thread-private barrier data if it hasn't
997 // already been allocated
998 if (th->th.th_hier_bar_data == NULL) {
999 th->th.th_hier_bar_data = (kmp_hier_private_bdata_t *)__kmp_allocate(
1000 sizeof(kmp_hier_private_bdata_t) * kmp_hier_layer_e::LAYER_LAST);
1001 }
1002 // Have threads "register" themselves by modifying the active count for each
1003 // level they are involved in. The active count will act as nthreads for that
1004 // level regarding the scheduling algorithms
1005 for (int i = 0; i < n; ++i) {
1006 int index = __kmp_dispatch_get_index(tid, hier->get_type(i));
1007 kmp_hier_top_unit_t<T> *my_unit = hier->get_unit(i, index);
1008 // Setup the thread's private dispatch buffer's hierarchy pointers
1009 if (i == 0)
1010 pr->hier_parent = my_unit;
1011 // If this unit is already active, then increment active count and wait
1012 if (my_unit->is_active()) {
1013 KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d my_unit (%p) "
1014 "is already active (%d)\n",
1015 gtid, my_unit, my_unit->active));
1016 KMP_TEST_THEN_INC32(&(my_unit->active));
1017 break;
1018 }
1019 // Flag that this unit is active
1020 if (KMP_COMPARE_AND_STORE_ACQ32(&(my_unit->active), 0, 1)) {
1021 // Do not setup parent pointer for top level unit since it has no parent
1022 if (i < n - 1) {
1023 // Setup middle layer pointers to parents
1024 my_unit->get_my_pr()->hier_id =
1026 hier->get_type(i + 1));
1027 int parent_index = __kmp_dispatch_get_index(tid, hier->get_type(i + 1));
1028 my_unit->hier_parent = hier->get_unit(i + 1, parent_index);
1029 } else {
1030 // Setup top layer information (no parent pointers are set)
1031 my_unit->get_my_pr()->hier_id =
1035 my_unit->hier_parent = nullptr;
1036 }
1037 // Set trip count to 0 so that next() operation will initially climb up
1038 // the hierarchy to get more iterations (early exit in next() for tc == 0)
1039 my_unit->get_my_pr()->u.p.tc = 0;
1040 // Increment this layer's number of active units
1041 KMP_TEST_THEN_INC32(&(hier->info[i].num_active));
1042 KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d my_unit (%p) "
1043 "incrementing num_active\n",
1044 gtid, my_unit));
1045 } else {
1046 KMP_TEST_THEN_INC32(&(my_unit->active));
1047 break;
1048 }
1049 }
1050 // Set this thread's id
1051 num_threads_per_layer1 = __kmp_dispatch_get_t1_per_t2(
1053 pr->hier_id = tid % num_threads_per_layer1;
1054 // For oversubscribed threads, increment their index within the lowest unit
1055 // This is done to prevent having two or more threads with id 0, id 1, etc.
1056 if (tid >= num_hw_threads)
1057 pr->hier_id += ((tid / num_hw_threads) * num_threads_per_layer1);
1058 KD_TRACE(
1059 10, ("__kmp_dispatch_init_hierarchy: T#%d setting lowest hier_id to %d\n",
1060 gtid, pr->hier_id));
1061
1063 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1064
1065 // Now that the number of active threads at each level is determined,
1066 // the barrier data for each unit can be initialized and the last layer's
1067 // loop information can be initialized.
1068 int prev_id = pr->get_hier_id();
1069 for (int i = 0; i < n; ++i) {
1070 if (prev_id != 0)
1071 break;
1072 int index = __kmp_dispatch_get_index(tid, hier->get_type(i));
1073 kmp_hier_top_unit_t<T> *my_unit = hier->get_unit(i, index);
1074 // Only primary threads of this unit within the hierarchy do initialization
1075 KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d (%d) prev_id is 0\n",
1076 gtid, i));
1077 my_unit->reset_shared_barrier();
1078 my_unit->hier_pr.flags.contains_last = FALSE;
1079 // Last layer, initialize the private buffers with entire loop information
1080 // Now the next next_algorithm() call will get the first chunk of
1081 // iterations properly
1082 if (i == n - 1) {
1083 __kmp_dispatch_init_algorithm<T>(
1084 loc, gtid, my_unit->get_my_pr(), hier->get_sched(i), lb, ub, st,
1085#if USE_ITT_BUILD
1086 NULL,
1087#endif
1088 hier->get_chunk(i), hier->get_num_active(i), my_unit->get_hier_id());
1089 }
1090 prev_id = my_unit->get_hier_id();
1091 }
1092 // Initialize each layer of the thread's private barrier data
1093 kmp_hier_top_unit_t<T> *unit = pr->hier_parent;
1094 for (int i = 0; i < n && unit; ++i, unit = unit->get_parent()) {
1095 kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[i]);
1096 unit->reset_private_barrier(tdata);
1097 }
1098 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1099
1100#ifdef KMP_DEBUG
1101 if (__kmp_tid_from_gtid(gtid) == 0) {
1102 for (int i = 0; i < n; ++i) {
1103 KD_TRACE(10,
1104 ("__kmp_dispatch_init_hierarchy: T#%d active count[%d] = %d\n",
1105 gtid, i, hier->get_num_active(i)));
1106 }
1107 hier->print();
1108 }
1109 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1110#endif // KMP_DEBUG
1111}
1112#endif
static void reset_private(kmp_int32 num_active, kmp_hier_private_bdata_t *tdata)
static void reset_shared(kmp_int32 num_active, kmp_hier_shared_bdata_t< T > *bdata)
static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t< T > *bdata, kmp_hier_private_bdata_t *tdata)
static void reset_shared(kmp_int32 num_active, kmp_hier_shared_bdata_t< T > *bdata)
static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t< T > *bdata, kmp_hier_private_bdata_t *tdata)
static void reset_private(kmp_int32 num_active, kmp_hier_private_bdata_t *tdata)
int64_t kmp_int64
Definition: common.h:10
sched_type
Describes the loop schedule to be used for a parallel for loop.
Definition: kmp.h:370
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
#define __kmp_free(ptr)
Definition: kmp.h:3756
int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, void *reduce_data, void(*reduce)(void *, void *))
#define __kmp_entry_gtid()
Definition: kmp.h:3601
static int __kmp_tid_from_gtid(int gtid)
Definition: kmp.h:3619
kmp_info_t ** __kmp_threads
Definition: kmp_global.cpp:450
int __kmp_dispatch_num_buffers
Definition: kmp_global.cpp:135
void __kmp_parallel_initialize(void)
volatile int __kmp_init_parallel
Definition: kmp_global.cpp:49
#define __kmp_allocate(size)
Definition: kmp.h:3754
#define TRUE
Definition: kmp.h:1324
#define FALSE
Definition: kmp.h:1323
@ bs_plain_barrier
Definition: kmp.h:2132
#define __kmp_get_gtid()
Definition: kmp.h:3600
void __kmp_resume_if_soft_paused()
static void __kmp_type_convert(T1 src, T2 *dest)
Definition: kmp.h:4855
union KMP_ALIGN_CACHE kmp_info kmp_info_t
#define KMP_ASSERT(cond)
Definition: kmp_debug.h:59
#define KD_TRACE(d, x)
Definition: kmp_debug.h:160
#define KMP_DEBUG_ASSERT(cond)
Definition: kmp_debug.h:61
unsigned long long kmp_uint64
void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, dispatch_private_info_template< T > *pr, enum sched_type schedule, T lb, T ub, typename traits_t< T >::signed_t st, typename traits_t< T >::signed_t chunk, T nproc, T tid)
void __kmp_dispatch_init_hierarchy(ident_t *loc, int n, kmp_hier_layer_e *new_layers, enum sched_type *new_scheds, typename traits_t< T >::signed_t *new_chunks, T lb, T ub, typename traits_t< T >::signed_t st)
kmp_hier_sched_env_t __kmp_hier_scheds
void __kmp_dispatch_free_hierarchies(kmp_team_t *team)
int __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LAST+1]
kmp_hier_layer_e
@ LAYER_THREAD
@ LAYER_NUMA
@ LAYER_L1
@ LAYER_LOOP
@ LAYER_L2
@ LAYER_LAST
@ LAYER_L3
static const char * __kmp_get_hier_str(kmp_hier_layer_e type)
int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type)
int __kmp_dispatch_hand_threading
int __kmp_dispatch_get_id(int gtid, kmp_hier_layer_e type)
int __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LAST+1]
int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2)
static volatile kmp_i18n_cat_status_t status
Definition: kmp_i18n.cpp:48
#define KMP_WARNING(...)
Definition: kmp_i18n.h:144
#define USE_ITT_BUILD_ARG(x)
Definition: kmp_itt.h:346
#define KMP_TEST_THEN_INC32(p)
Definition: kmp_os.h:724
#define RCAST(type, var)
Definition: kmp_os.h:291
#define TCR_4(a)
Definition: kmp_os.h:1135
#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv)
Definition: kmp_os.h:813
#define KMP_TEST_THEN_INC64(p)
Definition: kmp_os.h:734
#define i
Definition: kmp_stub.cpp:87
int32_t kmp_int32
static id loc
union KMP_ALIGN_CACHE dispatch_private_info_template::private_info_tmpl u
union dispatch_shared_info_template::shared_info_tmpl u
traits_t< T >::signed_t chunk
void append(enum sched_type sched, kmp_int32 chunk, kmp_hier_layer_e layer)
kmp_hier_layer_e * layers
enum sched_type * scheds
void set_next(T nlb, T nub, ST nst, kmp_int32 nstatus, kmp_uint64 index)
kmp_int32 get_next_status(kmp_uint64 index) const
T get_next_ub(kmp_uint64 index) const
T get_curr_ub(kmp_uint64 index) const
T get_curr_lb(kmp_uint64 index) const
traits_t< T >::signed_t ST
ST get_curr_st(kmp_uint64 index) const
volatile kmp_uint64 val[2]
void set_next_hand_thread(T nlb, T nub, ST nst, kmp_int32 nstatus, kmp_uint64 index)
dispatch_shared_info_template< T > volatile * get_curr_sh(kmp_uint64 index)
T get_next_lb(kmp_uint64 index) const
dispatch_shared_info_template< T > volatile * get_next_sh(kmp_uint64 index)
dispatch_shared_info_template< T > sh[2]
ST get_next_st(kmp_uint64 index) const
kmp_int32 get_curr_status(kmp_uint64 index) const
enum sched_type get_sched(int level) const
traits_t< T >::signed_t ST
int get_num_layers() const
kmp_hier_layer_info_t< T > * info
int get_top_level_nproc() const
traits_t< T >::unsigned_t UT
bool is_valid() const
kmp_hier_layer_e get_type(int level) const
bool need_to_reallocate(int n, const kmp_hier_layer_e *new_layers, const enum sched_type *new_scheds, const ST *new_chunks) const
int get_length(int level) const
ST get_chunk(int level) const
int get_num_active(int level) const
int next(ident_t *loc, int gtid, dispatch_private_info_template< T > *pr, kmp_int32 *p_last, T *p_lb, T *p_ub, ST *p_st)
void allocate_hier(int n, const kmp_hier_layer_e *new_layers, const enum sched_type *new_scheds, const ST *new_chunks)
kmp_hier_top_unit_t< T > ** layers
kmp_hier_top_unit_t< T > * get_unit(int level, int index)
dispatch_shared_info_template< T > volatile * get_next_sh(kmp_uint64 index)
void barrier(kmp_int32 id, kmp_hier_private_bdata_t *tdata)
ST get_next_st(kmp_uint64 index) const
T get_curr_ub(kmp_uint64 index) const
kmp_int32 get_curr_status(kmp_uint64 index) const
void set_next_hand_thread(T lb, T ub, ST st, kmp_int32 status, kmp_uint64 index)
dispatch_private_info_template< T > hier_pr
kmp_int32 is_active() const
T get_next_ub(kmp_uint64 index) const
dispatch_private_info_template< T > * get_my_pr()
dispatch_shared_info_template< T > volatile * get_curr_sh(kmp_uint64 index)
ST get_curr_st(kmp_uint64 index) const
kmp_int32 get_next_status(kmp_uint64 index) const
kmp_hier_top_unit_t< T > * get_parent()
traits_t< T >::unsigned_t UT
traits_t< T >::signed_t ST
kmp_hier_shared_bdata_t< T > hier_barrier
kmp_int32 get_hier_id() const
T get_next_lb(kmp_uint64 index) const
kmp_hier_top_unit_t< T > * hier_parent
T get_curr_lb(kmp_uint64 index) const
void reset_private_barrier(kmp_hier_private_bdata_t *tdata)
dispatch_private_info_template< T > * get_parent_pr()
kmp_int32 get_num_active() const
void set_next(T lb, T ub, ST st, kmp_int32 status, kmp_uint64 index)
unsigned contains_last
Definition: kmp.h:1878
unsigned use_hier
Definition: kmp.h:1879
dispatch_private_infoXX_template< T > p
Definition: kmp_dispatch.h:137
dispatch_shared_infoXX_template< UT > s
Definition: kmp_dispatch.h:175
Definition: kmp.h:3215
kmp_base_team_t t
Definition: kmp.h:3216