14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
29#include "kmp_dispatch_hier.h"
33#include "ompt-specific.h"
36#include "ompd-specific.h"
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79kmp_info_t __kmp_monitor;
84void __kmp_cleanup(
void);
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95static void __kmp_do_serial_initialize(
void);
96void __kmp_fork_barrier(
int gtid,
int tid);
97void __kmp_join_barrier(
int gtid);
98void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
99 kmp_internal_control_t *new_icvs,
ident_t *loc);
101#ifdef USE_LOAD_BALANCE
102static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
105static int __kmp_expand_threads(
int nNeed);
107static int __kmp_unregister_root_other_thread(
int gtid);
109static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
110kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
114void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
119int __kmp_get_global_thread_id() {
121 kmp_info_t **other_threads;
129 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
130 __kmp_nth, __kmp_all_nth));
137 if (!TCR_4(__kmp_init_gtid))
141 if (TCR_4(__kmp_gtid_mode) >= 3) {
142 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
146 if (TCR_4(__kmp_gtid_mode) >= 2) {
147 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
148 return __kmp_gtid_get_specific();
150 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
152 stack_addr = (
char *)&stack_data;
153 other_threads = __kmp_threads;
166 for (i = 0; i < __kmp_threads_capacity; i++) {
168 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
172 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
173 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
177 if (stack_addr <= stack_base) {
178 size_t stack_diff = stack_base - stack_addr;
180 if (stack_diff <= stack_size) {
187 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
188 __kmp_gtid_get_specific() == i);
196 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
197 "thread, using TLS\n"));
198 i = __kmp_gtid_get_specific();
209 if (!TCR_SYNC_PTR(other_threads[i]))
214 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
215 KMP_FATAL(StackOverflow, i);
218 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 if (stack_addr > stack_base) {
220 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
221 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
222 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
225 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
226 stack_base - stack_addr);
230 if (__kmp_storage_map) {
231 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
232 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
233 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
234 other_threads[i]->th.th_info.ds.ds_stacksize,
235 "th_%d stack (refinement)", i);
240int __kmp_get_global_thread_id_reg() {
243 if (!__kmp_init_serial) {
247 if (TCR_4(__kmp_gtid_mode) >= 3) {
248 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
252 if (TCR_4(__kmp_gtid_mode) >= 2) {
253 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
254 gtid = __kmp_gtid_get_specific();
257 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
258 gtid = __kmp_get_global_thread_id();
262 if (gtid == KMP_GTID_DNE) {
264 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
265 "Registering a new gtid.\n"));
266 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
267 if (!__kmp_init_serial) {
268 __kmp_do_serial_initialize();
269 gtid = __kmp_gtid_get_specific();
271 gtid = __kmp_register_root(FALSE);
273 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
277 KMP_DEBUG_ASSERT(gtid >= 0);
283void __kmp_check_stack_overlap(kmp_info_t *th) {
285 char *stack_beg = NULL;
286 char *stack_end = NULL;
289 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
290 if (__kmp_storage_map) {
291 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
292 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
294 gtid = __kmp_gtid_from_thread(th);
296 if (gtid == KMP_GTID_MONITOR) {
297 __kmp_print_storage_map_gtid(
298 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
299 "th_%s stack (%s)",
"mon",
300 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
302 __kmp_print_storage_map_gtid(
303 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
304 "th_%d stack (%s)", gtid,
305 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
311 gtid = __kmp_gtid_from_thread(th);
312 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
314 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
315 if (stack_beg == NULL) {
316 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
317 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
320 for (f = 0; f < __kmp_threads_capacity; f++) {
321 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
323 if (f_th && f_th != th) {
324 char *other_stack_end =
325 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
326 char *other_stack_beg =
327 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
328 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
329 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
332 if (__kmp_storage_map)
333 __kmp_print_storage_map_gtid(
334 -1, other_stack_beg, other_stack_end,
335 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
336 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
338 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
344 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
349void __kmp_infinite_loop(
void) {
350 static int done = FALSE;
357#define MAX_MESSAGE 512
359void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
360 char const *format, ...) {
361 char buffer[MAX_MESSAGE];
364 va_start(ap, format);
365 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
366 p2, (
unsigned long)size, format);
367 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
368 __kmp_vprintf(kmp_err, buffer, ap);
369#if KMP_PRINT_DATA_PLACEMENT
372 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
373 if (__kmp_storage_map_verbose) {
374 node = __kmp_get_host_node(p1);
376 __kmp_storage_map_verbose = FALSE;
380 int localProc = __kmp_get_cpu_from_gtid(gtid);
382 const int page_size = KMP_GET_PAGE_SIZE();
384 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
385 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
387 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
390 __kmp_printf_no_lock(
" GTID %d\n", gtid);
399 (
char *)p1 += page_size;
400 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
401 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
405 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
406 (
char *)p1 + (page_size - 1),
407 __kmp_get_host_node(p1));
409 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
410 (
char *)p2 + (page_size - 1),
411 __kmp_get_host_node(p2));
417 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
420 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
425void __kmp_warn(
char const *format, ...) {
426 char buffer[MAX_MESSAGE];
429 if (__kmp_generate_warnings == kmp_warnings_off) {
433 va_start(ap, format);
435 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
436 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
437 __kmp_vprintf(kmp_err, buffer, ap);
438 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
443void __kmp_abort_process() {
445 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
447 if (__kmp_debug_buf) {
448 __kmp_dump_debug_buffer();
454 __kmp_global.g.g_abort = SIGABRT;
468 __kmp_unregister_library();
472 __kmp_infinite_loop();
473 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
477void __kmp_abort_thread(
void) {
480 __kmp_infinite_loop();
486static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
487 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
491 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
493 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
494 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
496 __kmp_print_storage_map_gtid(
497 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
498 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
500 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
501 &thr->th.th_bar[bs_plain_barrier + 1],
502 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
506 &thr->th.th_bar[bs_forkjoin_barrier + 1],
507 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
510#if KMP_FAST_REDUCTION_BARRIER
511 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
512 &thr->th.th_bar[bs_reduction_barrier + 1],
513 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
521static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
522 int team_id,
int num_thr) {
523 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
524 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
527 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
528 &team->t.t_bar[bs_last_barrier],
529 sizeof(kmp_balign_team_t) * bs_last_barrier,
530 "%s_%d.t_bar", header, team_id);
532 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
533 &team->t.t_bar[bs_plain_barrier + 1],
534 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
537 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
538 &team->t.t_bar[bs_forkjoin_barrier + 1],
539 sizeof(kmp_balign_team_t),
540 "%s_%d.t_bar[forkjoin]", header, team_id);
542#if KMP_FAST_REDUCTION_BARRIER
543 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
544 &team->t.t_bar[bs_reduction_barrier + 1],
545 sizeof(kmp_balign_team_t),
546 "%s_%d.t_bar[reduction]", header, team_id);
549 __kmp_print_storage_map_gtid(
550 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
551 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
553 __kmp_print_storage_map_gtid(
554 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
555 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
557 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
558 &team->t.t_disp_buffer[num_disp_buff],
559 sizeof(dispatch_shared_info_t) * num_disp_buff,
560 "%s_%d.t_disp_buffer", header, team_id);
563static void __kmp_init_allocator() {
564 __kmp_init_memkind();
565 __kmp_init_target_mem();
567static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
571#if ENABLE_LIBOMPTARGET
572static void __kmp_init_omptarget() {
573 __kmp_init_target_task();
582BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
587 case DLL_PROCESS_ATTACH:
588 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
592 case DLL_PROCESS_DETACH:
593 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
606 if (lpReserved == NULL)
607 __kmp_internal_end_library(__kmp_gtid_get_specific());
611 case DLL_THREAD_ATTACH:
612 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
618 case DLL_THREAD_DETACH:
619 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
621 __kmp_internal_end_thread(__kmp_gtid_get_specific());
632void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
633 int gtid = *gtid_ref;
634#ifdef BUILD_PARALLEL_ORDERED
635 kmp_team_t *team = __kmp_team_from_gtid(gtid);
638 if (__kmp_env_consistency_check) {
639 if (__kmp_threads[gtid]->th.th_root->r.r_active)
640#if KMP_USE_DYNAMIC_LOCK
641 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
643 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
646#ifdef BUILD_PARALLEL_ORDERED
647 if (!team->t.t_serialized) {
649 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
657void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
658 int gtid = *gtid_ref;
659#ifdef BUILD_PARALLEL_ORDERED
660 int tid = __kmp_tid_from_gtid(gtid);
661 kmp_team_t *team = __kmp_team_from_gtid(gtid);
664 if (__kmp_env_consistency_check) {
665 if (__kmp_threads[gtid]->th.th_root->r.r_active)
666 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
668#ifdef BUILD_PARALLEL_ORDERED
669 if (!team->t.t_serialized) {
674 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
684int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
689 if (!TCR_4(__kmp_init_parallel))
690 __kmp_parallel_initialize();
691 __kmp_resume_if_soft_paused();
693 th = __kmp_threads[gtid];
694 team = th->th.th_team;
697 th->th.th_ident = id_ref;
699 if (team->t.t_serialized) {
702 kmp_int32 old_this = th->th.th_local.this_construct;
704 ++th->th.th_local.this_construct;
708 if (team->t.t_construct == old_this) {
709 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
710 th->th.th_local.this_construct);
713 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
714 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
715 team->t.t_active_level == 1) {
717 __kmp_itt_metadata_single(id_ref);
722 if (__kmp_env_consistency_check) {
723 if (status && push_ws) {
724 __kmp_push_workshare(gtid, ct_psingle, id_ref);
726 __kmp_check_workshare(gtid, ct_psingle, id_ref);
731 __kmp_itt_single_start(gtid);
737void __kmp_exit_single(
int gtid) {
739 __kmp_itt_single_end(gtid);
741 if (__kmp_env_consistency_check)
742 __kmp_pop_workshare(gtid, ct_psingle, NULL);
751static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
752 int master_tid,
int set_nthreads,
756 KMP_DEBUG_ASSERT(__kmp_init_serial);
757 KMP_DEBUG_ASSERT(root && parent_team);
758 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
762 new_nthreads = set_nthreads;
763 if (!get__dynamic_2(parent_team, master_tid)) {
766#ifdef USE_LOAD_BALANCE
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
768 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
769 if (new_nthreads == 1) {
770 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
771 "reservation to 1 thread\n",
775 if (new_nthreads < set_nthreads) {
776 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
777 "reservation to %d threads\n",
778 master_tid, new_nthreads));
782 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
783 new_nthreads = __kmp_avail_proc - __kmp_nth +
784 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
785 if (new_nthreads <= 1) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
787 "reservation to 1 thread\n",
791 if (new_nthreads < set_nthreads) {
792 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
793 "reservation to %d threads\n",
794 master_tid, new_nthreads));
796 new_nthreads = set_nthreads;
798 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
799 if (set_nthreads > 2) {
800 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
801 new_nthreads = (new_nthreads % set_nthreads) + 1;
802 if (new_nthreads == 1) {
803 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
804 "reservation to 1 thread\n",
808 if (new_nthreads < set_nthreads) {
809 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
810 "reservation to %d threads\n",
811 master_tid, new_nthreads));
819 if (__kmp_nth + new_nthreads -
820 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
822 int tl_nthreads = __kmp_max_nth - __kmp_nth +
823 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
824 if (tl_nthreads <= 0) {
829 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
830 __kmp_reserve_warn = 1;
831 __kmp_msg(kmp_ms_warning,
832 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
833 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
835 if (tl_nthreads == 1) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
837 "reduced reservation to 1 thread\n",
841 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
842 "reservation to %d threads\n",
843 master_tid, tl_nthreads));
844 new_nthreads = tl_nthreads;
848 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
849 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
850 if (cg_nthreads + new_nthreads -
851 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
853 int tl_nthreads = max_cg_threads - cg_nthreads +
854 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
855 if (tl_nthreads <= 0) {
860 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
861 __kmp_reserve_warn = 1;
862 __kmp_msg(kmp_ms_warning,
863 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
864 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
866 if (tl_nthreads == 1) {
867 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
868 "reduced reservation to 1 thread\n",
872 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
873 "reservation to %d threads\n",
874 master_tid, tl_nthreads));
875 new_nthreads = tl_nthreads;
881 capacity = __kmp_threads_capacity;
882 if (TCR_PTR(__kmp_threads[0]) == NULL) {
888 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
889 capacity -= __kmp_hidden_helper_threads_num;
891 if (__kmp_nth + new_nthreads -
892 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
895 int slotsRequired = __kmp_nth + new_nthreads -
896 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
898 int slotsAdded = __kmp_expand_threads(slotsRequired);
899 if (slotsAdded < slotsRequired) {
901 new_nthreads -= (slotsRequired - slotsAdded);
902 KMP_ASSERT(new_nthreads >= 1);
905 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
906 __kmp_reserve_warn = 1;
907 if (__kmp_tp_cached) {
908 __kmp_msg(kmp_ms_warning,
909 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
910 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
911 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
913 __kmp_msg(kmp_ms_warning,
914 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
915 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
922 if (new_nthreads == 1) {
924 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
925 "dead roots and rechecking; requested %d threads\n",
926 __kmp_get_gtid(), set_nthreads));
928 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
930 __kmp_get_gtid(), new_nthreads, set_nthreads));
939static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
940 kmp_info_t *master_th,
int master_gtid,
941 int fork_teams_workers) {
945 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
946 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
950 master_th->th.th_info.ds.ds_tid = 0;
951 master_th->th.th_team = team;
952 master_th->th.th_team_nproc = team->t.t_nproc;
953 master_th->th.th_team_master = master_th;
954 master_th->th.th_team_serialized = FALSE;
955 master_th->th.th_dispatch = &team->t.t_dispatch[0];
958#if KMP_NESTED_HOT_TEAMS
960 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
963 int level = team->t.t_active_level - 1;
964 if (master_th->th.th_teams_microtask) {
965 if (master_th->th.th_teams_size.nteams > 1) {
969 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
970 master_th->th.th_teams_level == team->t.t_level) {
975 if (level < __kmp_hot_teams_max_level) {
976 if (hot_teams[level].hot_team) {
978 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
982 hot_teams[level].hot_team = team;
983 hot_teams[level].hot_team_nth = team->t.t_nproc;
990 use_hot_team = team == root->r.r_hot_team;
995 team->t.t_threads[0] = master_th;
996 __kmp_initialize_info(master_th, team, 0, master_gtid);
999 for (i = 1; i < team->t.t_nproc; i++) {
1002 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1003 team->t.t_threads[i] = thr;
1004 KMP_DEBUG_ASSERT(thr);
1005 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1007 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1008 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1009 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1010 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1011 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1012 team->t.t_bar[bs_plain_barrier].b_arrived));
1013 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1014 thr->th.th_teams_level = master_th->th.th_teams_level;
1015 thr->th.th_teams_size = master_th->th.th_teams_size;
1018 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1019 for (b = 0; b < bs_last_barrier; ++b) {
1020 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1021 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1023 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1029#if KMP_AFFINITY_SUPPORTED
1033 if (!fork_teams_workers) {
1034 __kmp_partition_places(team);
1038 if (team->t.t_nproc > 1 &&
1039 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1040 team->t.b->update_num_threads(team->t.t_nproc);
1041 __kmp_add_threads_to_team(team, team->t.t_nproc);
1045 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1046 for (i = 0; i < team->t.t_nproc; i++) {
1047 kmp_info_t *thr = team->t.t_threads[i];
1048 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1049 thr->th.th_prev_level != team->t.t_level) {
1050 team->t.t_display_affinity = 1;
1059#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1063inline static void propagateFPControl(kmp_team_t *team) {
1064 if (__kmp_inherit_fp_control) {
1065 kmp_int16 x87_fpu_control_word;
1069 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1070 __kmp_store_mxcsr(&mxcsr);
1071 mxcsr &= KMP_X86_MXCSR_MASK;
1082 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1083 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1086 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1090 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1096inline static void updateHWFPControl(kmp_team_t *team) {
1097 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1100 kmp_int16 x87_fpu_control_word;
1102 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1103 __kmp_store_mxcsr(&mxcsr);
1104 mxcsr &= KMP_X86_MXCSR_MASK;
1106 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1107 __kmp_clear_x87_fpu_status_word();
1108 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1111 if (team->t.t_mxcsr != mxcsr) {
1112 __kmp_load_mxcsr(&team->t.t_mxcsr);
1117#define propagateFPControl(x) ((void)0)
1118#define updateHWFPControl(x) ((void)0)
1121static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1126void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1127 kmp_info_t *this_thr;
1128 kmp_team_t *serial_team;
1130 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1137 if (!TCR_4(__kmp_init_parallel))
1138 __kmp_parallel_initialize();
1139 __kmp_resume_if_soft_paused();
1141 this_thr = __kmp_threads[global_tid];
1142 serial_team = this_thr->th.th_serial_team;
1145 KMP_DEBUG_ASSERT(serial_team);
1148 if (__kmp_tasking_mode != tskm_immediate_exec) {
1150 this_thr->th.th_task_team ==
1151 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1152 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1154 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1155 "team %p, new task_team = NULL\n",
1156 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1157 this_thr->th.th_task_team = NULL;
1160 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1161 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1162 proc_bind = proc_bind_false;
1163 }
else if (proc_bind == proc_bind_default) {
1166 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1169 this_thr->th.th_set_proc_bind = proc_bind_default;
1172 this_thr->th.th_set_nproc = 0;
1175 ompt_data_t ompt_parallel_data = ompt_data_none;
1176 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1177 if (ompt_enabled.enabled &&
1178 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1180 ompt_task_info_t *parent_task_info;
1181 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1183 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1184 if (ompt_enabled.ompt_callback_parallel_begin) {
1187 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1188 &(parent_task_info->task_data), &(parent_task_info->frame),
1189 &ompt_parallel_data, team_size,
1190 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1195 if (this_thr->th.th_team != serial_team) {
1197 int level = this_thr->th.th_team->t.t_level;
1199 if (serial_team->t.t_serialized) {
1202 kmp_team_t *new_team;
1204 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1207 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1211 proc_bind, &this_thr->th.th_current_task->td_icvs,
1212 0 USE_NESTED_HOT_ARG(NULL));
1213 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1214 KMP_ASSERT(new_team);
1217 new_team->t.t_threads[0] = this_thr;
1218 new_team->t.t_parent = this_thr->th.th_team;
1219 serial_team = new_team;
1220 this_thr->th.th_serial_team = serial_team;
1224 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1225 global_tid, serial_team));
1233 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1234 global_tid, serial_team));
1238 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1239 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1240 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1241 serial_team->t.t_ident = loc;
1242 serial_team->t.t_serialized = 1;
1243 serial_team->t.t_nproc = 1;
1244 serial_team->t.t_parent = this_thr->th.th_team;
1245 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1246 this_thr->th.th_team = serial_team;
1247 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1249 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1250 this_thr->th.th_current_task));
1251 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1252 this_thr->th.th_current_task->td_flags.executing = 0;
1254 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1259 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1260 &this_thr->th.th_current_task->td_parent->td_icvs);
1264 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1265 this_thr->th.th_current_task->td_icvs.nproc =
1266 __kmp_nested_nth.nth[level + 1];
1269 if (__kmp_nested_proc_bind.used &&
1270 (level + 1 < __kmp_nested_proc_bind.used)) {
1271 this_thr->th.th_current_task->td_icvs.proc_bind =
1272 __kmp_nested_proc_bind.bind_types[level + 1];
1276 serial_team->t.t_pkfn = (microtask_t)(~0);
1278 this_thr->th.th_info.ds.ds_tid = 0;
1281 this_thr->th.th_team_nproc = 1;
1282 this_thr->th.th_team_master = this_thr;
1283 this_thr->th.th_team_serialized = 1;
1285 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1286 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1287 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1289 propagateFPControl(serial_team);
1292 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1293 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1294 serial_team->t.t_dispatch->th_disp_buffer =
1295 (dispatch_private_info_t *)__kmp_allocate(
1296 sizeof(dispatch_private_info_t));
1298 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1305 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1306 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1307 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1308 ++serial_team->t.t_serialized;
1309 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1312 int level = this_thr->th.th_team->t.t_level;
1315 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1316 this_thr->th.th_current_task->td_icvs.nproc =
1317 __kmp_nested_nth.nth[level + 1];
1319 serial_team->t.t_level++;
1320 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1321 "of serial team %p to %d\n",
1322 global_tid, serial_team, serial_team->t.t_level));
1325 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1327 dispatch_private_info_t *disp_buffer =
1328 (dispatch_private_info_t *)__kmp_allocate(
1329 sizeof(dispatch_private_info_t));
1330 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1331 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1333 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1337 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1341 if (__kmp_display_affinity) {
1342 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1343 this_thr->th.th_prev_num_threads != 1) {
1345 __kmp_aux_display_affinity(global_tid, NULL);
1346 this_thr->th.th_prev_level = serial_team->t.t_level;
1347 this_thr->th.th_prev_num_threads = 1;
1351 if (__kmp_env_consistency_check)
1352 __kmp_push_parallel(global_tid, NULL);
1354 serial_team->t.ompt_team_info.master_return_address = codeptr;
1355 if (ompt_enabled.enabled &&
1356 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1357 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1358 OMPT_GET_FRAME_ADDRESS(0);
1360 ompt_lw_taskteam_t lw_taskteam;
1361 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1362 &ompt_parallel_data, codeptr);
1364 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1368 if (ompt_enabled.ompt_callback_implicit_task) {
1369 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1370 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1371 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1372 ompt_task_implicit);
1373 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1374 __kmp_tid_from_gtid(global_tid);
1378 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1379 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1380 OMPT_GET_FRAME_ADDRESS(0);
1386static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1387 microtask_t microtask,
int level,
1388 int teams_level, kmp_va_list ap) {
1389 return (master_th->th.th_teams_microtask && ap &&
1390 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1395static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1396 int teams_level, kmp_va_list ap) {
1397 return ((ap == NULL && active_level == 0) ||
1398 (ap && teams_level > 0 && teams_level == level));
1405__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1406 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1407 enum fork_context_e call_context, microtask_t microtask,
1408 launch_t invoker,
int master_set_numthreads,
int level,
1410 ompt_data_t ompt_parallel_data,
void *return_address,
1416 parent_team->t.t_ident = loc;
1417 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1418 parent_team->t.t_argc = argc;
1419 argv = (
void **)parent_team->t.t_argv;
1420 for (i = argc - 1; i >= 0; --i) {
1421 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1424 if (parent_team == master_th->th.th_serial_team) {
1427 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1429 if (call_context == fork_context_gnu) {
1432 parent_team->t.t_serialized--;
1437 parent_team->t.t_pkfn = microtask;
1442 void **exit_frame_p;
1443 ompt_data_t *implicit_task_data;
1444 ompt_lw_taskteam_t lw_taskteam;
1446 if (ompt_enabled.enabled) {
1447 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1448 &ompt_parallel_data, return_address);
1449 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1451 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1455 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1456 if (ompt_enabled.ompt_callback_implicit_task) {
1457 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1458 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1459 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1460 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1464 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1466 exit_frame_p = &dummy;
1472 parent_team->t.t_serialized--;
1475 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1476 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1477 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1486 if (ompt_enabled.enabled) {
1487 *exit_frame_p = NULL;
1488 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1489 if (ompt_enabled.ompt_callback_implicit_task) {
1490 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1491 ompt_scope_end, NULL, implicit_task_data, 1,
1492 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1494 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1495 __ompt_lw_taskteam_unlink(master_th);
1496 if (ompt_enabled.ompt_callback_parallel_end) {
1497 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1498 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1499 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1501 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1507 parent_team->t.t_pkfn = microtask;
1508 parent_team->t.t_invoke = invoker;
1509 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1510 parent_team->t.t_active_level++;
1511 parent_team->t.t_level++;
1512 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1519 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1522 if (ompt_enabled.enabled) {
1523 ompt_lw_taskteam_t lw_taskteam;
1524 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1526 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1531 if (master_set_numthreads) {
1532 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1534 kmp_info_t **other_threads = parent_team->t.t_threads;
1537 int old_proc = master_th->th.th_teams_size.nth;
1538 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1539 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1540 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1542 parent_team->t.t_nproc = master_set_numthreads;
1543 for (i = 0; i < master_set_numthreads; ++i) {
1544 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1548 master_th->th.th_set_nproc = 0;
1552 if (__kmp_debugging) {
1553 int nth = __kmp_omp_num_threads(loc);
1555 master_set_numthreads = nth;
1561 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1563 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1564 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1565 proc_bind = proc_bind_false;
1568 if (proc_bind == proc_bind_default) {
1569 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1575 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1576 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1577 master_th->th.th_current_task->td_icvs.proc_bind)) {
1578 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1581 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1583 if (proc_bind_icv != proc_bind_default &&
1584 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1585 kmp_info_t **other_threads = parent_team->t.t_threads;
1586 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1587 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1591 master_th->th.th_set_proc_bind = proc_bind_default;
1593#if USE_ITT_BUILD && USE_ITT_NOTIFY
1594 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1596 __kmp_forkjoin_frames_mode == 3 &&
1597 parent_team->t.t_active_level == 1
1598 && master_th->th.th_teams_size.nteams == 1) {
1599 kmp_uint64 tmp_time = __itt_get_timestamp();
1600 master_th->th.th_frame_time = tmp_time;
1601 parent_team->t.t_region_time = tmp_time;
1603 if (__itt_stack_caller_create_ptr) {
1604 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1606 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1609#if KMP_AFFINITY_SUPPORTED
1610 __kmp_partition_places(parent_team);
1613 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1614 "master_th=%p, gtid=%d\n",
1615 root, parent_team, master_th, gtid));
1616 __kmp_internal_fork(loc, gtid, parent_team);
1617 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1618 "master_th=%p, gtid=%d\n",
1619 root, parent_team, master_th, gtid));
1621 if (call_context == fork_context_gnu)
1625 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1626 parent_team->t.t_id, parent_team->t.t_pkfn));
1628 if (!parent_team->t.t_invoke(gtid)) {
1629 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1631 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1632 parent_team->t.t_id, parent_team->t.t_pkfn));
1635 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1642__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1643 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1644 kmp_info_t *master_th, kmp_team_t *parent_team,
1646 ompt_data_t *ompt_parallel_data,
void **return_address,
1647 ompt_data_t **parent_task_data,
1655#if KMP_OS_LINUX && \
1656 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1659 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1664 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1669 master_th->th.th_serial_team->t.t_pkfn = microtask;
1672 if (call_context == fork_context_intel) {
1674 master_th->th.th_serial_team->t.t_ident = loc;
1677 master_th->th.th_serial_team->t.t_level--;
1682 void **exit_frame_p;
1683 ompt_task_info_t *task_info;
1684 ompt_lw_taskteam_t lw_taskteam;
1686 if (ompt_enabled.enabled) {
1687 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1688 ompt_parallel_data, *return_address);
1690 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1692 task_info = OMPT_CUR_TASK_INFO(master_th);
1693 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1694 if (ompt_enabled.ompt_callback_implicit_task) {
1695 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1696 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1697 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1698 &(task_info->task_data), 1,
1699 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1703 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1705 exit_frame_p = &dummy;
1710 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1711 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1712 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1721 if (ompt_enabled.enabled) {
1722 *exit_frame_p = NULL;
1723 if (ompt_enabled.ompt_callback_implicit_task) {
1724 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1725 ompt_scope_end, NULL, &(task_info->task_data), 1,
1726 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1728 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1729 __ompt_lw_taskteam_unlink(master_th);
1730 if (ompt_enabled.ompt_callback_parallel_end) {
1731 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1732 ompt_parallel_data, *parent_task_data,
1733 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1735 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1738 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1739 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1740 team = master_th->th.th_team;
1742 team->t.t_invoke = invoker;
1743 __kmp_alloc_argv_entries(argc, team, TRUE);
1744 team->t.t_argc = argc;
1745 argv = (
void **)team->t.t_argv;
1747 for (i = argc - 1; i >= 0; --i)
1748 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1750 for (i = 0; i < argc; ++i)
1752 argv[i] = parent_team->t.t_argv[i];
1760 if (ompt_enabled.enabled) {
1761 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1762 if (ompt_enabled.ompt_callback_implicit_task) {
1763 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1764 ompt_scope_end, NULL, &(task_info->task_data), 0,
1765 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1767 if (ompt_enabled.ompt_callback_parallel_end) {
1768 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1769 ompt_parallel_data, *parent_task_data,
1770 OMPT_INVOKER(call_context) | ompt_parallel_league,
1773 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1778 for (i = argc - 1; i >= 0; --i)
1779 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1784 void **exit_frame_p;
1785 ompt_task_info_t *task_info;
1786 ompt_lw_taskteam_t lw_taskteam;
1787 ompt_data_t *implicit_task_data;
1789 if (ompt_enabled.enabled) {
1790 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1791 ompt_parallel_data, *return_address);
1792 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1794 task_info = OMPT_CUR_TASK_INFO(master_th);
1795 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1798 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1799 if (ompt_enabled.ompt_callback_implicit_task) {
1800 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1801 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1802 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1803 ompt_task_implicit);
1804 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1808 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1810 exit_frame_p = &dummy;
1815 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1816 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1817 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1826 if (ompt_enabled.enabled) {
1827 *exit_frame_p = NULL;
1828 if (ompt_enabled.ompt_callback_implicit_task) {
1829 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1830 ompt_scope_end, NULL, &(task_info->task_data), 1,
1831 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1834 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1835 __ompt_lw_taskteam_unlink(master_th);
1836 if (ompt_enabled.ompt_callback_parallel_end) {
1837 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1838 ompt_parallel_data, *parent_task_data,
1839 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1841 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1845 }
else if (call_context == fork_context_gnu) {
1847 if (ompt_enabled.enabled) {
1848 ompt_lw_taskteam_t lwt;
1849 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1852 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1853 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1859 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1862 KMP_ASSERT2(call_context < fork_context_last,
1863 "__kmp_serial_fork_call: unknown fork_context parameter");
1866 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1873int __kmp_fork_call(
ident_t *loc,
int gtid,
1874 enum fork_context_e call_context,
1875 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1880 int master_this_cons;
1882 kmp_team_t *parent_team;
1883 kmp_info_t *master_th;
1887 int master_set_numthreads;
1888 int task_thread_limit = 0;
1892#if KMP_NESTED_HOT_TEAMS
1893 kmp_hot_team_ptr_t **p_hot_teams;
1896 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1899 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1900 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1903 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1905 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1906 __kmp_stkpadding += (short)((kmp_int64)dummy);
1912 if (!TCR_4(__kmp_init_parallel))
1913 __kmp_parallel_initialize();
1914 __kmp_resume_if_soft_paused();
1919 master_th = __kmp_threads[gtid];
1921 parent_team = master_th->th.th_team;
1922 master_tid = master_th->th.th_info.ds.ds_tid;
1923 master_this_cons = master_th->th.th_local.this_construct;
1924 root = master_th->th.th_root;
1925 master_active = root->r.r_active;
1926 master_set_numthreads = master_th->th.th_set_nproc;
1928 master_th->th.th_current_task->td_icvs.task_thread_limit;
1931 ompt_data_t ompt_parallel_data = ompt_data_none;
1932 ompt_data_t *parent_task_data;
1933 ompt_frame_t *ompt_frame;
1934 void *return_address = NULL;
1936 if (ompt_enabled.enabled) {
1937 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1939 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1944 __kmp_assign_root_init_mask();
1947 level = parent_team->t.t_level;
1949 active_level = parent_team->t.t_active_level;
1951 teams_level = master_th->th.th_teams_level;
1952#if KMP_NESTED_HOT_TEAMS
1953 p_hot_teams = &master_th->th.th_hot_teams;
1954 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1955 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1956 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1957 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1959 (*p_hot_teams)[0].hot_team_nth = 1;
1964 if (ompt_enabled.enabled) {
1965 if (ompt_enabled.ompt_callback_parallel_begin) {
1966 int team_size = master_set_numthreads
1967 ? master_set_numthreads
1968 : get__nproc_2(parent_team, master_tid);
1969 int flags = OMPT_INVOKER(call_context) |
1970 ((microtask == (microtask_t)__kmp_teams_master)
1971 ? ompt_parallel_league
1972 : ompt_parallel_team);
1973 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1974 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1977 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1981 master_th->th.th_ident = loc;
1984 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1985 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1986 call_context, microtask, invoker,
1987 master_set_numthreads, level,
1989 ompt_parallel_data, return_address,
1995 if (__kmp_tasking_mode != tskm_immediate_exec) {
1996 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1997 parent_team->t.t_task_team[master_th->th.th_task_state]);
2007 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2008 if ((!enter_teams &&
2009 (parent_team->t.t_active_level >=
2010 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2011 (__kmp_library == library_serial)) {
2012 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2015 nthreads = master_set_numthreads
2016 ? master_set_numthreads
2018 : get__nproc_2(parent_team, master_tid);
2021 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2028 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2033 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2034 nthreads, enter_teams);
2035 if (nthreads == 1) {
2039 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2043 KMP_DEBUG_ASSERT(nthreads > 0);
2046 master_th->th.th_set_nproc = 0;
2048 if (nthreads == 1) {
2049 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2050 invoker, master_th, parent_team,
2052 &ompt_parallel_data, &return_address,
2060 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2061 "curtask=%p, curtask_max_aclevel=%d\n",
2062 parent_team->t.t_active_level, master_th,
2063 master_th->th.th_current_task,
2064 master_th->th.th_current_task->td_icvs.max_active_levels));
2068 master_th->th.th_current_task->td_flags.executing = 0;
2070 if (!master_th->th.th_teams_microtask || level > teams_level) {
2072 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2076 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2077 if ((level + 1 < __kmp_nested_nth.used) &&
2078 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2079 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2085 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2087 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2088 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2089 proc_bind = proc_bind_false;
2093 if (proc_bind == proc_bind_default) {
2094 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2097 if (master_th->th.th_teams_microtask &&
2098 microtask == (microtask_t)__kmp_teams_master) {
2099 proc_bind = __kmp_teams_proc_bind;
2105 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2106 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2107 master_th->th.th_current_task->td_icvs.proc_bind)) {
2110 if (!master_th->th.th_teams_microtask ||
2111 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2112 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2117 master_th->th.th_set_proc_bind = proc_bind_default;
2119 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2120 kmp_internal_control_t new_icvs;
2121 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2122 new_icvs.next = NULL;
2123 if (nthreads_icv > 0) {
2124 new_icvs.nproc = nthreads_icv;
2126 if (proc_bind_icv != proc_bind_default) {
2127 new_icvs.proc_bind = proc_bind_icv;
2131 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2132 team = __kmp_allocate_team(root, nthreads, nthreads,
2136 proc_bind, &new_icvs,
2137 argc USE_NESTED_HOT_ARG(master_th));
2138 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2139 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2142 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2143 team = __kmp_allocate_team(root, nthreads, nthreads,
2148 &master_th->th.th_current_task->td_icvs,
2149 argc USE_NESTED_HOT_ARG(master_th));
2150 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2151 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2152 &master_th->th.th_current_task->td_icvs);
2155 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2158 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2159 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2160 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2161 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2162 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2164 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2167 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2169 if (!master_th->th.th_teams_microtask || level > teams_level) {
2170 int new_level = parent_team->t.t_level + 1;
2171 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2172 new_level = parent_team->t.t_active_level + 1;
2173 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2176 int new_level = parent_team->t.t_level;
2177 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2178 new_level = parent_team->t.t_active_level;
2179 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2181 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2183 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2185 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2186 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2189 propagateFPControl(team);
2191 if (ompd_state & OMPD_ENABLE_BP)
2192 ompd_bp_parallel_begin();
2195 if (__kmp_tasking_mode != tskm_immediate_exec) {
2198 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2199 parent_team->t.t_task_team[master_th->th.th_task_state]);
2200 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2201 "%p, new task_team %p / team %p\n",
2202 __kmp_gtid_from_thread(master_th),
2203 master_th->th.th_task_team, parent_team,
2204 team->t.t_task_team[master_th->th.th_task_state], team));
2206 if (active_level || master_th->th.th_task_team) {
2208 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2209 if (master_th->th.th_task_state_top >=
2210 master_th->th.th_task_state_stack_sz) {
2211 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2212 kmp_uint8 *old_stack, *new_stack;
2214 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2215 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2216 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2218 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2222 old_stack = master_th->th.th_task_state_memo_stack;
2223 master_th->th.th_task_state_memo_stack = new_stack;
2224 master_th->th.th_task_state_stack_sz = new_size;
2225 __kmp_free(old_stack);
2229 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2230 master_th->th.th_task_state;
2231 master_th->th.th_task_state_top++;
2232#if KMP_NESTED_HOT_TEAMS
2233 if (master_th->th.th_hot_teams &&
2234 active_level < __kmp_hot_teams_max_level &&
2235 team == master_th->th.th_hot_teams[active_level].hot_team) {
2237 master_th->th.th_task_state =
2239 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2242 master_th->th.th_task_state = 0;
2243#if KMP_NESTED_HOT_TEAMS
2247#if !KMP_NESTED_HOT_TEAMS
2248 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2249 (team == root->r.r_hot_team));
2255 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2256 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2258 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2259 (team->t.t_master_tid == 0 &&
2260 (team->t.t_parent == root->r.r_root_team ||
2261 team->t.t_parent->t.t_serialized)));
2265 argv = (
void **)team->t.t_argv;
2267 for (i = argc - 1; i >= 0; --i) {
2268 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2269 KMP_CHECK_UPDATE(*argv, new_argv);
2273 for (i = 0; i < argc; ++i) {
2275 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2280 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2281 if (!root->r.r_active)
2282 root->r.r_active = TRUE;
2284 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2285 __kmp_setup_icv_copy(team, nthreads,
2286 &master_th->th.th_current_task->td_icvs, loc);
2289 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2292 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2295 if (team->t.t_active_level == 1
2296 && !master_th->th.th_teams_microtask) {
2298 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2299 (__kmp_forkjoin_frames_mode == 3 ||
2300 __kmp_forkjoin_frames_mode == 1)) {
2301 kmp_uint64 tmp_time = 0;
2302 if (__itt_get_timestamp_ptr)
2303 tmp_time = __itt_get_timestamp();
2305 master_th->th.th_frame_time = tmp_time;
2306 if (__kmp_forkjoin_frames_mode == 3)
2307 team->t.t_region_time = tmp_time;
2311 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2312 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2314 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2320 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2323 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2324 root, team, master_th, gtid));
2327 if (__itt_stack_caller_create_ptr) {
2330 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2331 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2332 }
else if (parent_team->t.t_serialized) {
2337 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2338 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2346 __kmp_internal_fork(loc, gtid, team);
2347 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2348 "master_th=%p, gtid=%d\n",
2349 root, team, master_th, gtid));
2352 if (call_context == fork_context_gnu) {
2353 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2358 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2359 team->t.t_id, team->t.t_pkfn));
2362#if KMP_STATS_ENABLED
2366 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2370 if (!team->t.t_invoke(gtid)) {
2371 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2374#if KMP_STATS_ENABLED
2377 KMP_SET_THREAD_STATE(previous_state);
2381 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2382 team->t.t_id, team->t.t_pkfn));
2385 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2387 if (ompt_enabled.enabled) {
2388 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2396static inline void __kmp_join_restore_state(kmp_info_t *thread,
2399 thread->th.ompt_thread_info.state =
2400 ((team->t.t_serialized) ? ompt_state_work_serial
2401 : ompt_state_work_parallel);
2404static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2405 kmp_team_t *team, ompt_data_t *parallel_data,
2406 int flags,
void *codeptr) {
2407 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2408 if (ompt_enabled.ompt_callback_parallel_end) {
2409 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2410 parallel_data, &(task_info->task_data), flags, codeptr);
2413 task_info->frame.enter_frame = ompt_data_none;
2414 __kmp_join_restore_state(thread, team);
2418void __kmp_join_call(
ident_t *loc,
int gtid
2421 enum fork_context_e fork_context
2425 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2427 kmp_team_t *parent_team;
2428 kmp_info_t *master_th;
2432 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2435 master_th = __kmp_threads[gtid];
2436 root = master_th->th.th_root;
2437 team = master_th->th.th_team;
2438 parent_team = team->t.t_parent;
2440 master_th->th.th_ident = loc;
2443 void *team_microtask = (
void *)team->t.t_pkfn;
2447 if (ompt_enabled.enabled &&
2448 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2449 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2454 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2455 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2456 "th_task_team = %p\n",
2457 __kmp_gtid_from_thread(master_th), team,
2458 team->t.t_task_team[master_th->th.th_task_state],
2459 master_th->th.th_task_team));
2460 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2461 team->t.t_task_team[master_th->th.th_task_state]);
2465 if (team->t.t_serialized) {
2466 if (master_th->th.th_teams_microtask) {
2468 int level = team->t.t_level;
2469 int tlevel = master_th->th.th_teams_level;
2470 if (level == tlevel) {
2474 }
else if (level == tlevel + 1) {
2478 team->t.t_serialized++;
2484 if (ompt_enabled.enabled) {
2485 if (fork_context == fork_context_gnu) {
2486 __ompt_lw_taskteam_unlink(master_th);
2488 __kmp_join_restore_state(master_th, parent_team);
2495 master_active = team->t.t_master_active;
2500 __kmp_internal_join(loc, gtid, team);
2502 if (__itt_stack_caller_create_ptr) {
2503 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2505 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2506 team->t.t_stack_id = NULL;
2510 master_th->th.th_task_state =
2513 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2514 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2518 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2519 parent_team->t.t_stack_id = NULL;
2527 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2528 void *codeptr = team->t.ompt_team_info.master_return_address;
2533 if (team->t.t_active_level == 1 &&
2534 (!master_th->th.th_teams_microtask ||
2535 master_th->th.th_teams_size.nteams == 1)) {
2536 master_th->th.th_ident = loc;
2539 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2540 __kmp_forkjoin_frames_mode == 3)
2541 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2542 master_th->th.th_frame_time, 0, loc,
2543 master_th->th.th_team_nproc, 1);
2544 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2545 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2546 __kmp_itt_region_joined(gtid);
2550#if KMP_AFFINITY_SUPPORTED
2553 master_th->th.th_first_place = team->t.t_first_place;
2554 master_th->th.th_last_place = team->t.t_last_place;
2558 if (master_th->th.th_teams_microtask && !exit_teams &&
2559 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2560 team->t.t_level == master_th->th.th_teams_level + 1) {
2565 ompt_data_t ompt_parallel_data = ompt_data_none;
2566 if (ompt_enabled.enabled) {
2567 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2568 if (ompt_enabled.ompt_callback_implicit_task) {
2569 int ompt_team_size = team->t.t_nproc;
2570 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2571 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2572 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2574 task_info->frame.exit_frame = ompt_data_none;
2575 task_info->task_data = ompt_data_none;
2576 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2577 __ompt_lw_taskteam_unlink(master_th);
2582 team->t.t_active_level--;
2583 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2589 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2590 int old_num = master_th->th.th_team_nproc;
2591 int new_num = master_th->th.th_teams_size.nth;
2592 kmp_info_t **other_threads = team->t.t_threads;
2593 team->t.t_nproc = new_num;
2594 for (
int i = 0; i < old_num; ++i) {
2595 other_threads[i]->th.th_team_nproc = new_num;
2598 for (
int i = old_num; i < new_num; ++i) {
2600 KMP_DEBUG_ASSERT(other_threads[i]);
2601 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2602 for (
int b = 0; b < bs_last_barrier; ++b) {
2603 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2604 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2606 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2609 if (__kmp_tasking_mode != tskm_immediate_exec) {
2611 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2617 if (ompt_enabled.enabled) {
2618 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2619 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2627 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2628 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2630 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2635 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2637 if (!master_th->th.th_teams_microtask ||
2638 team->t.t_level > master_th->th.th_teams_level) {
2640 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2642 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2645 if (ompt_enabled.enabled) {
2646 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2647 if (ompt_enabled.ompt_callback_implicit_task) {
2648 int flags = (team_microtask == (
void *)__kmp_teams_master)
2650 : ompt_task_implicit;
2651 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2652 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2653 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2654 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2656 task_info->frame.exit_frame = ompt_data_none;
2657 task_info->task_data = ompt_data_none;
2661 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2663 __kmp_pop_current_task_from_thread(master_th);
2665 master_th->th.th_def_allocator = team->t.t_def_allocator;
2668 if (ompd_state & OMPD_ENABLE_BP)
2669 ompd_bp_parallel_end();
2671 updateHWFPControl(team);
2673 if (root->r.r_active != master_active)
2674 root->r.r_active = master_active;
2676 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2684 master_th->th.th_team = parent_team;
2685 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2686 master_th->th.th_team_master = parent_team->t.t_threads[0];
2687 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2690 if (parent_team->t.t_serialized &&
2691 parent_team != master_th->th.th_serial_team &&
2692 parent_team != root->r.r_root_team) {
2693 __kmp_free_team(root,
2694 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2695 master_th->th.th_serial_team = parent_team;
2698 if (__kmp_tasking_mode != tskm_immediate_exec) {
2699 if (master_th->th.th_task_state_top >
2701 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2703 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2704 master_th->th.th_task_state;
2705 --master_th->th.th_task_state_top;
2707 master_th->th.th_task_state =
2709 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2710 }
else if (team != root->r.r_hot_team) {
2715 master_th->th.th_task_state = 0;
2718 master_th->th.th_task_team =
2719 parent_team->t.t_task_team[master_th->th.th_task_state];
2721 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2722 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2729 master_th->th.th_current_task->td_flags.executing = 1;
2731 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2733#if KMP_AFFINITY_SUPPORTED
2734 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2735 __kmp_reset_root_init_mask(gtid);
2740 OMPT_INVOKER(fork_context) |
2741 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2742 : ompt_parallel_team);
2743 if (ompt_enabled.enabled) {
2744 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2750 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2755void __kmp_save_internal_controls(kmp_info_t *thread) {
2757 if (thread->th.th_team != thread->th.th_serial_team) {
2760 if (thread->th.th_team->t.t_serialized > 1) {
2763 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2766 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2767 thread->th.th_team->t.t_serialized) {
2772 kmp_internal_control_t *control =
2773 (kmp_internal_control_t *)__kmp_allocate(
2774 sizeof(kmp_internal_control_t));
2776 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2778 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2780 control->next = thread->th.th_team->t.t_control_stack_top;
2781 thread->th.th_team->t.t_control_stack_top = control;
2787void __kmp_set_num_threads(
int new_nth,
int gtid) {
2791 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2792 KMP_DEBUG_ASSERT(__kmp_init_serial);
2796 else if (new_nth > __kmp_max_nth)
2797 new_nth = __kmp_max_nth;
2800 thread = __kmp_threads[gtid];
2801 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2804 __kmp_save_internal_controls(thread);
2806 set__nproc(thread, new_nth);
2811 root = thread->th.th_root;
2812 if (__kmp_init_parallel && (!root->r.r_active) &&
2813 (root->r.r_hot_team->t.t_nproc > new_nth)
2814#
if KMP_NESTED_HOT_TEAMS
2815 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2818 kmp_team_t *hot_team = root->r.r_hot_team;
2821 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2823 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2824 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2827 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2828 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2829 if (__kmp_tasking_mode != tskm_immediate_exec) {
2832 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2834 __kmp_free_thread(hot_team->t.t_threads[f]);
2835 hot_team->t.t_threads[f] = NULL;
2837 hot_team->t.t_nproc = new_nth;
2838#if KMP_NESTED_HOT_TEAMS
2839 if (thread->th.th_hot_teams) {
2840 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2841 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2845 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2846 hot_team->t.b->update_num_threads(new_nth);
2847 __kmp_add_threads_to_team(hot_team, new_nth);
2850 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2853 for (f = 0; f < new_nth; f++) {
2854 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2855 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2858 hot_team->t.t_size_changed = -1;
2863void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2866 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2868 gtid, max_active_levels));
2869 KMP_DEBUG_ASSERT(__kmp_init_serial);
2872 if (max_active_levels < 0) {
2873 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2878 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2879 "max_active_levels for thread %d = (%d)\n",
2880 gtid, max_active_levels));
2883 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2888 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2889 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2890 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2896 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2897 "max_active_levels for thread %d = (%d)\n",
2898 gtid, max_active_levels));
2900 thread = __kmp_threads[gtid];
2902 __kmp_save_internal_controls(thread);
2904 set__max_active_levels(thread, max_active_levels);
2908int __kmp_get_max_active_levels(
int gtid) {
2911 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2912 KMP_DEBUG_ASSERT(__kmp_init_serial);
2914 thread = __kmp_threads[gtid];
2915 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2916 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2917 "curtask_maxaclevel=%d\n",
2918 gtid, thread->th.th_current_task,
2919 thread->th.th_current_task->td_icvs.max_active_levels));
2920 return thread->th.th_current_task->td_icvs.max_active_levels;
2924void __kmp_set_num_teams(
int num_teams) {
2926 __kmp_nteams = num_teams;
2928int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2930void __kmp_set_teams_thread_limit(
int limit) {
2932 __kmp_teams_thread_limit = limit;
2934int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2936KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2937KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2940void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2942 kmp_sched_t orig_kind;
2945 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2946 gtid, (
int)kind, chunk));
2947 KMP_DEBUG_ASSERT(__kmp_init_serial);
2954 kind = __kmp_sched_without_mods(kind);
2956 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2957 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2959 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2960 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2962 kind = kmp_sched_default;
2966 thread = __kmp_threads[gtid];
2968 __kmp_save_internal_controls(thread);
2970 if (kind < kmp_sched_upper_std) {
2971 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2974 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2976 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2977 __kmp_sch_map[kind - kmp_sched_lower - 1];
2982 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2983 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2984 kmp_sched_lower - 2];
2986 __kmp_sched_apply_mods_intkind(
2987 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2988 if (kind == kmp_sched_auto || chunk < 1) {
2990 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2992 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2997void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
3001 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
3002 KMP_DEBUG_ASSERT(__kmp_init_serial);
3004 thread = __kmp_threads[gtid];
3006 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3007 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3009 case kmp_sch_static_greedy:
3010 case kmp_sch_static_balanced:
3011 *kind = kmp_sched_static;
3012 __kmp_sched_apply_mods_stdkind(kind, th_type);
3015 case kmp_sch_static_chunked:
3016 *kind = kmp_sched_static;
3018 case kmp_sch_dynamic_chunked:
3019 *kind = kmp_sched_dynamic;
3022 case kmp_sch_guided_iterative_chunked:
3023 case kmp_sch_guided_analytical_chunked:
3024 *kind = kmp_sched_guided;
3027 *kind = kmp_sched_auto;
3029 case kmp_sch_trapezoidal:
3030 *kind = kmp_sched_trapezoidal;
3032#if KMP_STATIC_STEAL_ENABLED
3033 case kmp_sch_static_steal:
3034 *kind = kmp_sched_static_steal;
3038 KMP_FATAL(UnknownSchedulingType, th_type);
3041 __kmp_sched_apply_mods_stdkind(kind, th_type);
3042 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3045int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3051 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3052 KMP_DEBUG_ASSERT(__kmp_init_serial);
3059 thr = __kmp_threads[gtid];
3060 team = thr->th.th_team;
3061 ii = team->t.t_level;
3065 if (thr->th.th_teams_microtask) {
3067 int tlevel = thr->th.th_teams_level;
3070 KMP_DEBUG_ASSERT(ii >= tlevel);
3082 return __kmp_tid_from_gtid(gtid);
3084 dd = team->t.t_serialized;
3086 while (ii > level) {
3087 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3089 if ((team->t.t_serialized) && (!dd)) {
3090 team = team->t.t_parent;
3094 team = team->t.t_parent;
3095 dd = team->t.t_serialized;
3100 return (dd > 1) ? (0) : (team->t.t_master_tid);
3103int __kmp_get_team_size(
int gtid,
int level) {
3109 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3110 KMP_DEBUG_ASSERT(__kmp_init_serial);
3117 thr = __kmp_threads[gtid];
3118 team = thr->th.th_team;
3119 ii = team->t.t_level;
3123 if (thr->th.th_teams_microtask) {
3125 int tlevel = thr->th.th_teams_level;
3128 KMP_DEBUG_ASSERT(ii >= tlevel);
3139 while (ii > level) {
3140 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3142 if (team->t.t_serialized && (!dd)) {
3143 team = team->t.t_parent;
3147 team = team->t.t_parent;
3152 return team->t.t_nproc;
3155kmp_r_sched_t __kmp_get_schedule_global() {
3160 kmp_r_sched_t r_sched;
3166 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3167 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3170 r_sched.r_sched_type = __kmp_static;
3173 r_sched.r_sched_type = __kmp_guided;
3175 r_sched.r_sched_type = __kmp_sched;
3177 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3179 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3181 r_sched.chunk = KMP_DEFAULT_CHUNK;
3183 r_sched.chunk = __kmp_chunk;
3191static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3193 KMP_DEBUG_ASSERT(team);
3194 if (!realloc || argc > team->t.t_max_argc) {
3196 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3197 "current entries=%d\n",
3198 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3200 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3201 __kmp_free((
void *)team->t.t_argv);
3203 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3205 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3206 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3208 team->t.t_id, team->t.t_max_argc));
3209 team->t.t_argv = &team->t.t_inline_argv[0];
3210 if (__kmp_storage_map) {
3211 __kmp_print_storage_map_gtid(
3212 -1, &team->t.t_inline_argv[0],
3213 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3214 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3219 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3220 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3222 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3224 team->t.t_id, team->t.t_max_argc));
3226 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3227 if (__kmp_storage_map) {
3228 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3229 &team->t.t_argv[team->t.t_max_argc],
3230 sizeof(
void *) * team->t.t_max_argc,
3231 "team_%d.t_argv", team->t.t_id);
3237static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3239 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3241 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3242 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3243 sizeof(dispatch_shared_info_t) * num_disp_buff);
3244 team->t.t_dispatch =
3245 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3246 team->t.t_implicit_task_taskdata =
3247 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3248 team->t.t_max_nproc = max_nth;
3251 for (i = 0; i < num_disp_buff; ++i) {
3252 team->t.t_disp_buffer[i].buffer_index = i;
3253 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3257static void __kmp_free_team_arrays(kmp_team_t *team) {
3260 for (i = 0; i < team->t.t_max_nproc; ++i) {
3261 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3262 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3263 team->t.t_dispatch[i].th_disp_buffer = NULL;
3266#if KMP_USE_HIER_SCHED
3267 __kmp_dispatch_free_hierarchies(team);
3269 __kmp_free(team->t.t_threads);
3270 __kmp_free(team->t.t_disp_buffer);
3271 __kmp_free(team->t.t_dispatch);
3272 __kmp_free(team->t.t_implicit_task_taskdata);
3273 team->t.t_threads = NULL;
3274 team->t.t_disp_buffer = NULL;
3275 team->t.t_dispatch = NULL;
3276 team->t.t_implicit_task_taskdata = 0;
3279static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3280 kmp_info_t **oldThreads = team->t.t_threads;
3282 __kmp_free(team->t.t_disp_buffer);
3283 __kmp_free(team->t.t_dispatch);
3284 __kmp_free(team->t.t_implicit_task_taskdata);
3285 __kmp_allocate_team_arrays(team, max_nth);
3287 KMP_MEMCPY(team->t.t_threads, oldThreads,
3288 team->t.t_nproc *
sizeof(kmp_info_t *));
3290 __kmp_free(oldThreads);
3293static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3295 kmp_r_sched_t r_sched =
3296 __kmp_get_schedule_global();
3298 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3300 kmp_internal_control_t g_icvs = {
3302 (kmp_int8)__kmp_global.g.g_dynamic,
3304 (kmp_int8)__kmp_env_blocktime,
3306 __kmp_dflt_blocktime,
3311 __kmp_dflt_team_nth,
3317 __kmp_dflt_max_active_levels,
3321 __kmp_nested_proc_bind.bind_types[0],
3322 __kmp_default_device,
3329static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3331 kmp_internal_control_t gx_icvs;
3332 gx_icvs.serial_nesting_level =
3334 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3335 gx_icvs.next = NULL;
3340static void __kmp_initialize_root(kmp_root_t *root) {
3342 kmp_team_t *root_team;
3343 kmp_team_t *hot_team;
3344 int hot_team_max_nth;
3345 kmp_r_sched_t r_sched =
3346 __kmp_get_schedule_global();
3347 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3348 KMP_DEBUG_ASSERT(root);
3349 KMP_ASSERT(!root->r.r_begin);
3352 __kmp_init_lock(&root->r.r_begin_lock);
3353 root->r.r_begin = FALSE;
3354 root->r.r_active = FALSE;
3355 root->r.r_in_parallel = 0;
3356 root->r.r_blocktime = __kmp_dflt_blocktime;
3357#if KMP_AFFINITY_SUPPORTED
3358 root->r.r_affinity_assigned = FALSE;
3363 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3366 __kmp_allocate_team(root,
3372 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3374 USE_NESTED_HOT_ARG(NULL)
3379 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3382 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3384 root->r.r_root_team = root_team;
3385 root_team->t.t_control_stack_top = NULL;
3388 root_team->t.t_threads[0] = NULL;
3389 root_team->t.t_nproc = 1;
3390 root_team->t.t_serialized = 1;
3392 root_team->t.t_sched.sched = r_sched.sched;
3395 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3396 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3400 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3403 __kmp_allocate_team(root,
3405 __kmp_dflt_team_nth_ub * 2,
3409 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3411 USE_NESTED_HOT_ARG(NULL)
3413 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3415 root->r.r_hot_team = hot_team;
3416 root_team->t.t_control_stack_top = NULL;
3419 hot_team->t.t_parent = root_team;
3422 hot_team_max_nth = hot_team->t.t_max_nproc;
3423 for (f = 0; f < hot_team_max_nth; ++f) {
3424 hot_team->t.t_threads[f] = NULL;
3426 hot_team->t.t_nproc = 1;
3428 hot_team->t.t_sched.sched = r_sched.sched;
3429 hot_team->t.t_size_changed = 0;
3434typedef struct kmp_team_list_item {
3435 kmp_team_p
const *entry;
3436 struct kmp_team_list_item *next;
3437} kmp_team_list_item_t;
3438typedef kmp_team_list_item_t *kmp_team_list_t;
3440static void __kmp_print_structure_team_accum(
3441 kmp_team_list_t list,
3442 kmp_team_p
const *team
3452 KMP_DEBUG_ASSERT(list != NULL);
3457 __kmp_print_structure_team_accum(list, team->t.t_parent);
3458 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3462 while (l->next != NULL && l->entry != team) {
3465 if (l->next != NULL) {
3471 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3477 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3478 sizeof(kmp_team_list_item_t));
3485static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3488 __kmp_printf(
"%s", title);
3490 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3492 __kmp_printf(
" - (nil)\n");
3496static void __kmp_print_structure_thread(
char const *title,
3497 kmp_info_p
const *thread) {
3498 __kmp_printf(
"%s", title);
3499 if (thread != NULL) {
3500 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3502 __kmp_printf(
" - (nil)\n");
3506void __kmp_print_structure(
void) {
3508 kmp_team_list_t list;
3512 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3516 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3517 "Table\n------------------------------\n");
3520 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3521 __kmp_printf(
"%2d", gtid);
3522 if (__kmp_threads != NULL) {
3523 __kmp_printf(
" %p", __kmp_threads[gtid]);
3525 if (__kmp_root != NULL) {
3526 __kmp_printf(
" %p", __kmp_root[gtid]);
3533 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3535 if (__kmp_threads != NULL) {
3537 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3538 kmp_info_t
const *thread = __kmp_threads[gtid];
3539 if (thread != NULL) {
3540 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3541 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3542 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3543 __kmp_print_structure_team(
" Serial Team: ",
3544 thread->th.th_serial_team);
3545 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3546 __kmp_print_structure_thread(
" Primary: ",
3547 thread->th.th_team_master);
3548 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3549 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3550 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3551 __kmp_print_structure_thread(
" Next in pool: ",
3552 thread->th.th_next_pool);
3554 __kmp_print_structure_team_accum(list, thread->th.th_team);
3555 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3559 __kmp_printf(
"Threads array is not allocated.\n");
3563 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3565 if (__kmp_root != NULL) {
3567 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3568 kmp_root_t
const *root = __kmp_root[gtid];
3570 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3571 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3572 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3573 __kmp_print_structure_thread(
" Uber Thread: ",
3574 root->r.r_uber_thread);
3575 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3576 __kmp_printf(
" In Parallel: %2d\n",
3577 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3579 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3580 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3584 __kmp_printf(
"Ubers array is not allocated.\n");
3587 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3589 while (list->next != NULL) {
3590 kmp_team_p
const *team = list->entry;
3592 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3593 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3594 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3595 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3596 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3597 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3598 for (i = 0; i < team->t.t_nproc; ++i) {
3599 __kmp_printf(
" Thread %2d: ", i);
3600 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3602 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3608 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3610 __kmp_print_structure_thread(
"Thread pool: ",
3611 CCAST(kmp_info_t *, __kmp_thread_pool));
3612 __kmp_print_structure_team(
"Team pool: ",
3613 CCAST(kmp_team_t *, __kmp_team_pool));
3617 while (list != NULL) {
3618 kmp_team_list_item_t *item = list;
3620 KMP_INTERNAL_FREE(item);
3629static const unsigned __kmp_primes[] = {
3630 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3631 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3632 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3633 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3634 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3635 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3636 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3637 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3638 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3639 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3640 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3644unsigned short __kmp_get_random(kmp_info_t *thread) {
3645 unsigned x = thread->th.th_x;
3646 unsigned short r = (
unsigned short)(x >> 16);
3648 thread->th.th_x = x * thread->th.th_a + 1;
3650 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3651 thread->th.th_info.ds.ds_tid, r));
3657void __kmp_init_random(kmp_info_t *thread) {
3658 unsigned seed = thread->th.th_info.ds.ds_tid;
3661 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3662 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3664 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3670static int __kmp_reclaim_dead_roots(
void) {
3673 for (i = 0; i < __kmp_threads_capacity; ++i) {
3674 if (KMP_UBER_GTID(i) &&
3675 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3678 r += __kmp_unregister_root_other_thread(i);
3703static int __kmp_expand_threads(
int nNeed) {
3705 int minimumRequiredCapacity;
3707 kmp_info_t **newThreads;
3708 kmp_root_t **newRoot;
3714#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3717 added = __kmp_reclaim_dead_roots();
3746 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3749 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3753 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3755 newCapacity = __kmp_threads_capacity;
3757 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3758 : __kmp_sys_max_nth;
3759 }
while (newCapacity < minimumRequiredCapacity);
3760 newThreads = (kmp_info_t **)__kmp_allocate(
3761 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3763 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3764 KMP_MEMCPY(newThreads, __kmp_threads,
3765 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3766 KMP_MEMCPY(newRoot, __kmp_root,
3767 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3770 kmp_old_threads_list_t *node =
3771 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3772 node->threads = __kmp_threads;
3773 node->next = __kmp_old_threads_list;
3774 __kmp_old_threads_list = node;
3776 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3777 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3778 added += newCapacity - __kmp_threads_capacity;
3779 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3781 if (newCapacity > __kmp_tp_capacity) {
3782 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3783 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3784 __kmp_threadprivate_resize_cache(newCapacity);
3786 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3788 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3797int __kmp_register_root(
int initial_thread) {
3798 kmp_info_t *root_thread;
3802 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3803 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3820 capacity = __kmp_threads_capacity;
3821 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3828 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3829 capacity -= __kmp_hidden_helper_threads_num;
3833 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3834 if (__kmp_tp_cached) {
3835 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3836 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3837 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3839 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3849 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3852 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3853 gtid <= __kmp_hidden_helper_threads_num;
3856 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3857 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3858 "hidden helper thread: T#%d\n",
3864 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3867 for (gtid = __kmp_hidden_helper_threads_num + 1;
3868 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3872 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3873 KMP_ASSERT(gtid < __kmp_threads_capacity);
3878 TCW_4(__kmp_nth, __kmp_nth + 1);
3882 if (__kmp_adjust_gtid_mode) {
3883 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3884 if (TCR_4(__kmp_gtid_mode) != 2) {
3885 TCW_4(__kmp_gtid_mode, 2);
3888 if (TCR_4(__kmp_gtid_mode) != 1) {
3889 TCW_4(__kmp_gtid_mode, 1);
3894#ifdef KMP_ADJUST_BLOCKTIME
3897 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3898 if (__kmp_nth > __kmp_avail_proc) {
3899 __kmp_zero_bt = TRUE;
3905 if (!(root = __kmp_root[gtid])) {
3906 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3907 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3910#if KMP_STATS_ENABLED
3912 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3913 __kmp_stats_thread_ptr->startLife();
3914 KMP_SET_THREAD_STATE(SERIAL_REGION);
3917 __kmp_initialize_root(root);
3920 if (root->r.r_uber_thread) {
3921 root_thread = root->r.r_uber_thread;
3923 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3924 if (__kmp_storage_map) {
3925 __kmp_print_thread_storage_map(root_thread, gtid);
3927 root_thread->th.th_info.ds.ds_gtid = gtid;
3929 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3931 root_thread->th.th_root = root;
3932 if (__kmp_env_consistency_check) {
3933 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3936 __kmp_initialize_fast_memory(root_thread);
3940 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3941 __kmp_initialize_bget(root_thread);
3943 __kmp_init_random(root_thread);
3947 if (!root_thread->th.th_serial_team) {
3948 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3949 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3950 root_thread->th.th_serial_team = __kmp_allocate_team(
3955 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3957 KMP_ASSERT(root_thread->th.th_serial_team);
3958 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3959 root_thread->th.th_serial_team));
3962 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3964 root->r.r_root_team->t.t_threads[0] = root_thread;
3965 root->r.r_hot_team->t.t_threads[0] = root_thread;
3966 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3968 root_thread->th.th_serial_team->t.t_serialized = 0;
3969 root->r.r_uber_thread = root_thread;
3972 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3973 TCW_4(__kmp_init_gtid, TRUE);
3976 __kmp_gtid_set_specific(gtid);
3979 __kmp_itt_thread_name(gtid);
3982#ifdef KMP_TDATA_GTID
3985 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3986 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3988 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3990 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3991 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3992 KMP_INIT_BARRIER_STATE));
3995 for (b = 0; b < bs_last_barrier; ++b) {
3996 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3998 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
4002 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4003 KMP_INIT_BARRIER_STATE);
4005#if KMP_AFFINITY_SUPPORTED
4006 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4007 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4008 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4009 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4011 root_thread->th.th_def_allocator = __kmp_def_allocator;
4012 root_thread->th.th_prev_level = 0;
4013 root_thread->th.th_prev_num_threads = 1;
4015 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4016 tmp->cg_root = root_thread;
4017 tmp->cg_thread_limit = __kmp_cg_max_nth;
4018 tmp->cg_nthreads = 1;
4019 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4020 " cg_nthreads init to 1\n",
4023 root_thread->th.th_cg_roots = tmp;
4025 __kmp_root_counter++;
4028 if (!initial_thread && ompt_enabled.enabled) {
4030 kmp_info_t *root_thread = ompt_get_thread();
4032 ompt_set_thread_state(root_thread, ompt_state_overhead);
4034 if (ompt_enabled.ompt_callback_thread_begin) {
4035 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4036 ompt_thread_initial, __ompt_get_thread_data_internal());
4038 ompt_data_t *task_data;
4039 ompt_data_t *parallel_data;
4040 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4042 if (ompt_enabled.ompt_callback_implicit_task) {
4043 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4044 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4047 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4051 if (ompd_state & OMPD_ENABLE_BP)
4052 ompd_bp_thread_begin();
4056 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4061#if KMP_NESTED_HOT_TEAMS
4062static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4063 const int max_level) {
4065 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4066 if (!hot_teams || !hot_teams[level].hot_team) {
4069 KMP_DEBUG_ASSERT(level < max_level);
4070 kmp_team_t *team = hot_teams[level].hot_team;
4071 nth = hot_teams[level].hot_team_nth;
4073 if (level < max_level - 1) {
4074 for (i = 0; i < nth; ++i) {
4075 kmp_info_t *th = team->t.t_threads[i];
4076 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4077 if (i > 0 && th->th.th_hot_teams) {
4078 __kmp_free(th->th.th_hot_teams);
4079 th->th.th_hot_teams = NULL;
4083 __kmp_free_team(root, team, NULL);
4090static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4091 kmp_team_t *root_team = root->r.r_root_team;
4092 kmp_team_t *hot_team = root->r.r_hot_team;
4093 int n = hot_team->t.t_nproc;
4096 KMP_DEBUG_ASSERT(!root->r.r_active);
4098 root->r.r_root_team = NULL;
4099 root->r.r_hot_team = NULL;
4102 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4103#if KMP_NESTED_HOT_TEAMS
4104 if (__kmp_hot_teams_max_level >
4106 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4107 kmp_info_t *th = hot_team->t.t_threads[i];
4108 if (__kmp_hot_teams_max_level > 1) {
4109 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4111 if (th->th.th_hot_teams) {
4112 __kmp_free(th->th.th_hot_teams);
4113 th->th.th_hot_teams = NULL;
4118 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4123 if (__kmp_tasking_mode != tskm_immediate_exec) {
4124 __kmp_wait_to_unref_task_teams();
4130 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4132 (LPVOID) & (root->r.r_uber_thread->th),
4133 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4134 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4138 if (ompd_state & OMPD_ENABLE_BP)
4139 ompd_bp_thread_end();
4143 ompt_data_t *task_data;
4144 ompt_data_t *parallel_data;
4145 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4147 if (ompt_enabled.ompt_callback_implicit_task) {
4148 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4149 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4151 if (ompt_enabled.ompt_callback_thread_end) {
4152 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4153 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4159 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4160 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4162 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4163 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4166 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4167 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4168 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4169 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4170 root->r.r_uber_thread->th.th_cg_roots = NULL;
4172 __kmp_reap_thread(root->r.r_uber_thread, 1);
4176 root->r.r_uber_thread = NULL;
4178 root->r.r_begin = FALSE;
4183void __kmp_unregister_root_current_thread(
int gtid) {
4184 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4188 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4189 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4190 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4193 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4196 kmp_root_t *root = __kmp_root[gtid];
4198 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4199 KMP_ASSERT(KMP_UBER_GTID(gtid));
4200 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4201 KMP_ASSERT(root->r.r_active == FALSE);
4205 kmp_info_t *thread = __kmp_threads[gtid];
4206 kmp_team_t *team = thread->th.th_team;
4207 kmp_task_team_t *task_team = thread->th.th_task_team;
4210 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4211 task_team->tt.tt_hidden_helper_task_encountered)) {
4214 thread->th.ompt_thread_info.state = ompt_state_undefined;
4216 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4219 __kmp_reset_root(gtid, root);
4223 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4225 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4232static int __kmp_unregister_root_other_thread(
int gtid) {
4233 kmp_root_t *root = __kmp_root[gtid];
4236 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4237 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4238 KMP_ASSERT(KMP_UBER_GTID(gtid));
4239 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4240 KMP_ASSERT(root->r.r_active == FALSE);
4242 r = __kmp_reset_root(gtid, root);
4244 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4250void __kmp_task_info() {
4252 kmp_int32 gtid = __kmp_entry_gtid();
4253 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4254 kmp_info_t *this_thr = __kmp_threads[gtid];
4255 kmp_team_t *steam = this_thr->th.th_serial_team;
4256 kmp_team_t *team = this_thr->th.th_team;
4259 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4261 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4262 team->t.t_implicit_task_taskdata[tid].td_parent);
4269static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4270 int tid,
int gtid) {
4274 KMP_DEBUG_ASSERT(this_thr != NULL);
4275 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4276 KMP_DEBUG_ASSERT(team);
4277 KMP_DEBUG_ASSERT(team->t.t_threads);
4278 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4279 kmp_info_t *master = team->t.t_threads[0];
4280 KMP_DEBUG_ASSERT(master);
4281 KMP_DEBUG_ASSERT(master->th.th_root);
4285 TCW_SYNC_PTR(this_thr->th.th_team, team);
4287 this_thr->th.th_info.ds.ds_tid = tid;
4288 this_thr->th.th_set_nproc = 0;
4289 if (__kmp_tasking_mode != tskm_immediate_exec)
4292 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4294 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4295 this_thr->th.th_set_proc_bind = proc_bind_default;
4296#if KMP_AFFINITY_SUPPORTED
4297 this_thr->th.th_new_place = this_thr->th.th_current_place;
4299 this_thr->th.th_root = master->th.th_root;
4302 this_thr->th.th_team_nproc = team->t.t_nproc;
4303 this_thr->th.th_team_master = master;
4304 this_thr->th.th_team_serialized = team->t.t_serialized;
4306 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4308 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4309 tid, gtid, this_thr, this_thr->th.th_current_task));
4311 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4314 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4315 tid, gtid, this_thr, this_thr->th.th_current_task));
4320 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4322 this_thr->th.th_local.this_construct = 0;
4324 if (!this_thr->th.th_pri_common) {
4325 this_thr->th.th_pri_common =
4326 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4327 if (__kmp_storage_map) {
4328 __kmp_print_storage_map_gtid(
4329 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4330 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4332 this_thr->th.th_pri_head = NULL;
4335 if (this_thr != master &&
4336 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4338 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4339 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4342 int i = tmp->cg_nthreads--;
4343 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4344 " on node %p of thread %p to %d\n",
4345 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4350 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4352 this_thr->th.th_cg_roots->cg_nthreads++;
4353 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4354 " node %p of thread %p to %d\n",
4355 this_thr, this_thr->th.th_cg_roots,
4356 this_thr->th.th_cg_roots->cg_root,
4357 this_thr->th.th_cg_roots->cg_nthreads));
4358 this_thr->th.th_current_task->td_icvs.thread_limit =
4359 this_thr->th.th_cg_roots->cg_thread_limit;
4364 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4367 sizeof(dispatch_private_info_t) *
4368 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4369 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4370 team->t.t_max_nproc));
4371 KMP_ASSERT(dispatch);
4372 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4373 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4375 dispatch->th_disp_index = 0;
4376 dispatch->th_doacross_buf_idx = 0;
4377 if (!dispatch->th_disp_buffer) {
4378 dispatch->th_disp_buffer =
4379 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4381 if (__kmp_storage_map) {
4382 __kmp_print_storage_map_gtid(
4383 gtid, &dispatch->th_disp_buffer[0],
4384 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4386 : __kmp_dispatch_num_buffers],
4388 "th_%d.th_dispatch.th_disp_buffer "
4389 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4390 gtid, team->t.t_id, gtid);
4393 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4396 dispatch->th_dispatch_pr_current = 0;
4397 dispatch->th_dispatch_sh_current = 0;
4399 dispatch->th_deo_fcn = 0;
4400 dispatch->th_dxo_fcn = 0;
4403 this_thr->th.th_next_pool = NULL;
4405 if (!this_thr->th.th_task_state_memo_stack) {
4407 this_thr->th.th_task_state_memo_stack =
4408 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4409 this_thr->th.th_task_state_top = 0;
4410 this_thr->th.th_task_state_stack_sz = 4;
4411 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4413 this_thr->th.th_task_state_memo_stack[i] = 0;
4416 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4417 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4427kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4429 kmp_team_t *serial_team;
4430 kmp_info_t *new_thr;
4433 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4434 KMP_DEBUG_ASSERT(root && team);
4435#if !KMP_NESTED_HOT_TEAMS
4436 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4441 if (__kmp_thread_pool) {
4442 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4443 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4444 if (new_thr == __kmp_thread_pool_insert_pt) {
4445 __kmp_thread_pool_insert_pt = NULL;
4447 TCW_4(new_thr->th.th_in_pool, FALSE);
4448 __kmp_suspend_initialize_thread(new_thr);
4449 __kmp_lock_suspend_mx(new_thr);
4450 if (new_thr->th.th_active_in_pool == TRUE) {
4451 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4452 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4453 new_thr->th.th_active_in_pool = FALSE;
4455 __kmp_unlock_suspend_mx(new_thr);
4457 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4458 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4459 KMP_ASSERT(!new_thr->th.th_team);
4460 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4463 __kmp_initialize_info(new_thr, team, new_tid,
4464 new_thr->th.th_info.ds.ds_gtid);
4465 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4467 TCW_4(__kmp_nth, __kmp_nth + 1);
4469 new_thr->th.th_task_state = 0;
4470 new_thr->th.th_task_state_top = 0;
4471 new_thr->th.th_task_state_stack_sz = 4;
4473 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4475 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4479#ifdef KMP_ADJUST_BLOCKTIME
4482 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4483 if (__kmp_nth > __kmp_avail_proc) {
4484 __kmp_zero_bt = TRUE;
4493 kmp_balign_t *balign = new_thr->th.th_bar;
4494 for (b = 0; b < bs_last_barrier; ++b)
4495 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4498 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4499 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4506 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4507 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4512 if (!TCR_4(__kmp_init_monitor)) {
4513 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4514 if (!TCR_4(__kmp_init_monitor)) {
4515 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4516 TCW_4(__kmp_init_monitor, 1);
4517 __kmp_create_monitor(&__kmp_monitor);
4518 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4529 while (TCR_4(__kmp_init_monitor) < 2) {
4532 KF_TRACE(10, (
"after monitor thread has started\n"));
4535 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4542 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4544 : __kmp_hidden_helper_threads_num + 1;
4546 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4548 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4551 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4552 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4557 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4559 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4561#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4564 __itt_suppress_mark_range(
4565 __itt_suppress_range, __itt_suppress_threading_errors,
4566 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4567 __itt_suppress_mark_range(
4568 __itt_suppress_range, __itt_suppress_threading_errors,
4569 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4571 __itt_suppress_mark_range(
4572 __itt_suppress_range, __itt_suppress_threading_errors,
4573 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4575 __itt_suppress_mark_range(__itt_suppress_range,
4576 __itt_suppress_threading_errors,
4577 &new_thr->th.th_suspend_init_count,
4578 sizeof(new_thr->th.th_suspend_init_count));
4581 __itt_suppress_mark_range(__itt_suppress_range,
4582 __itt_suppress_threading_errors,
4583 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4584 sizeof(new_thr->th.th_bar[0].bb.b_go));
4585 __itt_suppress_mark_range(__itt_suppress_range,
4586 __itt_suppress_threading_errors,
4587 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4588 sizeof(new_thr->th.th_bar[1].bb.b_go));
4589 __itt_suppress_mark_range(__itt_suppress_range,
4590 __itt_suppress_threading_errors,
4591 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4592 sizeof(new_thr->th.th_bar[2].bb.b_go));
4594 if (__kmp_storage_map) {
4595 __kmp_print_thread_storage_map(new_thr, new_gtid);
4600 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4601 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4602 new_thr->th.th_serial_team = serial_team =
4603 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4607 proc_bind_default, &r_icvs,
4608 0 USE_NESTED_HOT_ARG(NULL));
4610 KMP_ASSERT(serial_team);
4611 serial_team->t.t_serialized = 0;
4613 serial_team->t.t_threads[0] = new_thr;
4615 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4619 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4622 __kmp_initialize_fast_memory(new_thr);
4626 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4627 __kmp_initialize_bget(new_thr);
4630 __kmp_init_random(new_thr);
4634 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4635 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4638 kmp_balign_t *balign = new_thr->th.th_bar;
4639 for (b = 0; b < bs_last_barrier; ++b) {
4640 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4641 balign[b].bb.team = NULL;
4642 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4643 balign[b].bb.use_oncore_barrier = 0;
4646 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4647 new_thr->th.th_sleep_loc_type = flag_unset;
4649 new_thr->th.th_spin_here = FALSE;
4650 new_thr->th.th_next_waiting = 0;
4652 new_thr->th.th_blocking =
false;
4655#if KMP_AFFINITY_SUPPORTED
4656 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4657 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4658 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4659 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4661 new_thr->th.th_def_allocator = __kmp_def_allocator;
4662 new_thr->th.th_prev_level = 0;
4663 new_thr->th.th_prev_num_threads = 1;
4665 TCW_4(new_thr->th.th_in_pool, FALSE);
4666 new_thr->th.th_active_in_pool = FALSE;
4667 TCW_4(new_thr->th.th_active, TRUE);
4675 if (__kmp_adjust_gtid_mode) {
4676 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4677 if (TCR_4(__kmp_gtid_mode) != 2) {
4678 TCW_4(__kmp_gtid_mode, 2);
4681 if (TCR_4(__kmp_gtid_mode) != 1) {
4682 TCW_4(__kmp_gtid_mode, 1);
4687#ifdef KMP_ADJUST_BLOCKTIME
4690 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4691 if (__kmp_nth > __kmp_avail_proc) {
4692 __kmp_zero_bt = TRUE;
4697#if KMP_AFFINITY_SUPPORTED
4699 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4704 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4705 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4707 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4709 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4720static void __kmp_reinitialize_team(kmp_team_t *team,
4721 kmp_internal_control_t *new_icvs,
4723 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4724 team->t.t_threads[0], team));
4725 KMP_DEBUG_ASSERT(team && new_icvs);
4726 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4727 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4729 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4731 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4732 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4734 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4735 team->t.t_threads[0], team));
4741static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4742 kmp_internal_control_t *new_icvs,
4744 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4747 KMP_DEBUG_ASSERT(team);
4748 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4749 KMP_DEBUG_ASSERT(team->t.t_threads);
4752 team->t.t_master_tid = 0;
4754 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4755 team->t.t_nproc = new_nproc;
4758 team->t.t_next_pool = NULL;
4762 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4763 team->t.t_invoke = NULL;
4766 team->t.t_sched.sched = new_icvs->sched.sched;
4768#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4769 team->t.t_fp_control_saved = FALSE;
4770 team->t.t_x87_fpu_control_word = 0;
4771 team->t.t_mxcsr = 0;
4774 team->t.t_construct = 0;
4776 team->t.t_ordered.dt.t_value = 0;
4777 team->t.t_master_active = FALSE;
4780 team->t.t_copypriv_data = NULL;
4783 team->t.t_copyin_counter = 0;
4786 team->t.t_control_stack_top = NULL;
4788 __kmp_reinitialize_team(team, new_icvs, loc);
4791 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4794#if KMP_AFFINITY_SUPPORTED
4795static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4796 int first,
int last,
int newp) {
4797 th->th.th_first_place = first;
4798 th->th.th_last_place = last;
4799 th->th.th_new_place = newp;
4800 if (newp != th->th.th_current_place) {
4801 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4802 team->t.t_display_affinity = 1;
4804 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4805 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4813static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4815 if (KMP_HIDDEN_HELPER_TEAM(team))
4818 kmp_info_t *master_th = team->t.t_threads[0];
4819 KMP_DEBUG_ASSERT(master_th != NULL);
4820 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4821 int first_place = master_th->th.th_first_place;
4822 int last_place = master_th->th.th_last_place;
4823 int masters_place = master_th->th.th_current_place;
4824 int num_masks = __kmp_affinity.num_masks;
4825 team->t.t_first_place = first_place;
4826 team->t.t_last_place = last_place;
4828 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4829 "bound to place %d partition = [%d,%d]\n",
4830 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4831 team->t.t_id, masters_place, first_place, last_place));
4833 switch (proc_bind) {
4835 case proc_bind_default:
4838 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4841 case proc_bind_primary: {
4843 int n_th = team->t.t_nproc;
4844 for (f = 1; f < n_th; f++) {
4845 kmp_info_t *th = team->t.t_threads[f];
4846 KMP_DEBUG_ASSERT(th != NULL);
4847 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4849 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4850 "partition = [%d,%d]\n",
4851 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4852 f, masters_place, first_place, last_place));
4856 case proc_bind_close: {
4858 int n_th = team->t.t_nproc;
4860 if (first_place <= last_place) {
4861 n_places = last_place - first_place + 1;
4863 n_places = num_masks - first_place + last_place + 1;
4865 if (n_th <= n_places) {
4866 int place = masters_place;
4867 for (f = 1; f < n_th; f++) {
4868 kmp_info_t *th = team->t.t_threads[f];
4869 KMP_DEBUG_ASSERT(th != NULL);
4871 if (place == last_place) {
4872 place = first_place;
4873 }
else if (place == (num_masks - 1)) {
4878 __kmp_set_thread_place(team, th, first_place, last_place, place);
4880 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4881 "partition = [%d,%d]\n",
4882 __kmp_gtid_from_thread(team->t.t_threads[f]),
4883 team->t.t_id, f, place, first_place, last_place));
4886 int S, rem, gap, s_count;
4887 S = n_th / n_places;
4889 rem = n_th - (S * n_places);
4890 gap = rem > 0 ? n_places / rem : n_places;
4891 int place = masters_place;
4893 for (f = 0; f < n_th; f++) {
4894 kmp_info_t *th = team->t.t_threads[f];
4895 KMP_DEBUG_ASSERT(th != NULL);
4897 __kmp_set_thread_place(team, th, first_place, last_place, place);
4900 if ((s_count == S) && rem && (gap_ct == gap)) {
4902 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4904 if (place == last_place) {
4905 place = first_place;
4906 }
else if (place == (num_masks - 1)) {
4914 }
else if (s_count == S) {
4915 if (place == last_place) {
4916 place = first_place;
4917 }
else if (place == (num_masks - 1)) {
4927 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4928 "partition = [%d,%d]\n",
4929 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4930 th->th.th_new_place, first_place, last_place));
4932 KMP_DEBUG_ASSERT(place == masters_place);
4936 case proc_bind_spread: {
4938 int n_th = team->t.t_nproc;
4941 if (first_place <= last_place) {
4942 n_places = last_place - first_place + 1;
4944 n_places = num_masks - first_place + last_place + 1;
4946 if (n_th <= n_places) {
4949 if (n_places != num_masks) {
4950 int S = n_places / n_th;
4951 int s_count, rem, gap, gap_ct;
4953 place = masters_place;
4954 rem = n_places - n_th * S;
4955 gap = rem ? n_th / rem : 1;
4958 if (update_master_only == 1)
4960 for (f = 0; f < thidx; f++) {
4961 kmp_info_t *th = team->t.t_threads[f];
4962 KMP_DEBUG_ASSERT(th != NULL);
4964 int fplace = place, nplace = place;
4966 while (s_count < S) {
4967 if (place == last_place) {
4968 place = first_place;
4969 }
else if (place == (num_masks - 1)) {
4976 if (rem && (gap_ct == gap)) {
4977 if (place == last_place) {
4978 place = first_place;
4979 }
else if (place == (num_masks - 1)) {
4987 __kmp_set_thread_place(team, th, fplace, place, nplace);
4990 if (place == last_place) {
4991 place = first_place;
4992 }
else if (place == (num_masks - 1)) {
4999 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5000 "partition = [%d,%d], num_masks: %u\n",
5001 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5002 f, th->th.th_new_place, th->th.th_first_place,
5003 th->th.th_last_place, num_masks));
5009 double current =
static_cast<double>(masters_place);
5011 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5016 if (update_master_only == 1)
5018 for (f = 0; f < thidx; f++) {
5019 first =
static_cast<int>(current);
5020 last =
static_cast<int>(current + spacing) - 1;
5021 KMP_DEBUG_ASSERT(last >= first);
5022 if (first >= n_places) {
5023 if (masters_place) {
5026 if (first == (masters_place + 1)) {
5027 KMP_DEBUG_ASSERT(f == n_th);
5030 if (last == masters_place) {
5031 KMP_DEBUG_ASSERT(f == (n_th - 1));
5035 KMP_DEBUG_ASSERT(f == n_th);
5040 if (last >= n_places) {
5041 last = (n_places - 1);
5046 KMP_DEBUG_ASSERT(0 <= first);
5047 KMP_DEBUG_ASSERT(n_places > first);
5048 KMP_DEBUG_ASSERT(0 <= last);
5049 KMP_DEBUG_ASSERT(n_places > last);
5050 KMP_DEBUG_ASSERT(last_place >= first_place);
5051 th = team->t.t_threads[f];
5052 KMP_DEBUG_ASSERT(th);
5053 __kmp_set_thread_place(team, th, first, last, place);
5055 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5056 "partition = [%d,%d], spacing = %.4f\n",
5057 __kmp_gtid_from_thread(team->t.t_threads[f]),
5058 team->t.t_id, f, th->th.th_new_place,
5059 th->th.th_first_place, th->th.th_last_place, spacing));
5063 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5065 int S, rem, gap, s_count;
5066 S = n_th / n_places;
5068 rem = n_th - (S * n_places);
5069 gap = rem > 0 ? n_places / rem : n_places;
5070 int place = masters_place;
5073 if (update_master_only == 1)
5075 for (f = 0; f < thidx; f++) {
5076 kmp_info_t *th = team->t.t_threads[f];
5077 KMP_DEBUG_ASSERT(th != NULL);
5079 __kmp_set_thread_place(team, th, place, place, place);
5082 if ((s_count == S) && rem && (gap_ct == gap)) {
5084 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5086 if (place == last_place) {
5087 place = first_place;
5088 }
else if (place == (num_masks - 1)) {
5096 }
else if (s_count == S) {
5097 if (place == last_place) {
5098 place = first_place;
5099 }
else if (place == (num_masks - 1)) {
5108 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5109 "partition = [%d,%d]\n",
5110 __kmp_gtid_from_thread(team->t.t_threads[f]),
5111 team->t.t_id, f, th->th.th_new_place,
5112 th->th.th_first_place, th->th.th_last_place));
5114 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5122 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5130__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5132 ompt_data_t ompt_parallel_data,
5134 kmp_proc_bind_t new_proc_bind,
5135 kmp_internal_control_t *new_icvs,
5136 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5137 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5140 int use_hot_team = !root->r.r_active;
5142 int do_place_partition = 1;
5144 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5145 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5146 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5149#if KMP_NESTED_HOT_TEAMS
5150 kmp_hot_team_ptr_t *hot_teams;
5152 team = master->th.th_team;
5153 level = team->t.t_active_level;
5154 if (master->th.th_teams_microtask) {
5155 if (master->th.th_teams_size.nteams > 1 &&
5158 (microtask_t)__kmp_teams_master ||
5159 master->th.th_teams_level <
5166 if ((master->th.th_teams_size.nteams == 1 &&
5167 master->th.th_teams_level >= team->t.t_level) ||
5168 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5169 do_place_partition = 0;
5171 hot_teams = master->th.th_hot_teams;
5172 if (level < __kmp_hot_teams_max_level && hot_teams &&
5173 hot_teams[level].hot_team) {
5181 KMP_DEBUG_ASSERT(new_nproc == 1);
5185 if (use_hot_team && new_nproc > 1) {
5186 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5187#if KMP_NESTED_HOT_TEAMS
5188 team = hot_teams[level].hot_team;
5190 team = root->r.r_hot_team;
5193 if (__kmp_tasking_mode != tskm_immediate_exec) {
5194 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5195 "task_team[1] = %p before reinit\n",
5196 team->t.t_task_team[0], team->t.t_task_team[1]));
5200 if (team->t.t_nproc != new_nproc &&
5201 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5203 int old_nthr = team->t.t_nproc;
5204 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5209 if (do_place_partition == 0)
5210 team->t.t_proc_bind = proc_bind_default;
5214 if (team->t.t_nproc == new_nproc) {
5215 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5218 if (team->t.t_size_changed == -1) {
5219 team->t.t_size_changed = 1;
5221 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5225 kmp_r_sched_t new_sched = new_icvs->sched;
5227 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5229 __kmp_reinitialize_team(team, new_icvs,
5230 root->r.r_uber_thread->th.th_ident);
5232 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5233 team->t.t_threads[0], team));
5234 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5236#if KMP_AFFINITY_SUPPORTED
5237 if ((team->t.t_size_changed == 0) &&
5238 (team->t.t_proc_bind == new_proc_bind)) {
5239 if (new_proc_bind == proc_bind_spread) {
5240 if (do_place_partition) {
5242 __kmp_partition_places(team, 1);
5245 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5246 "proc_bind = %d, partition = [%d,%d]\n",
5247 team->t.t_id, new_proc_bind, team->t.t_first_place,
5248 team->t.t_last_place));
5250 if (do_place_partition) {
5251 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5252 __kmp_partition_places(team);
5256 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5258 }
else if (team->t.t_nproc > new_nproc) {
5260 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5263 team->t.t_size_changed = 1;
5264 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5267 __kmp_add_threads_to_team(team, new_nproc);
5269#if KMP_NESTED_HOT_TEAMS
5270 if (__kmp_hot_teams_mode == 0) {
5273 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5274 hot_teams[level].hot_team_nth = new_nproc;
5277 for (f = new_nproc; f < team->t.t_nproc; f++) {
5278 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5279 if (__kmp_tasking_mode != tskm_immediate_exec) {
5282 team->t.t_threads[f]->th.th_task_team = NULL;
5284 __kmp_free_thread(team->t.t_threads[f]);
5285 team->t.t_threads[f] = NULL;
5287#if KMP_NESTED_HOT_TEAMS
5292 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5293 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5294 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5295 for (
int b = 0; b < bs_last_barrier; ++b) {
5296 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5297 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5299 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5304 team->t.t_nproc = new_nproc;
5306 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5307 __kmp_reinitialize_team(team, new_icvs,
5308 root->r.r_uber_thread->th.th_ident);
5311 for (f = 0; f < new_nproc; ++f) {
5312 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5317 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5318 team->t.t_threads[0], team));
5320 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5323 for (f = 0; f < team->t.t_nproc; f++) {
5324 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5325 team->t.t_threads[f]->th.th_team_nproc ==
5330 if (do_place_partition) {
5331 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5332#if KMP_AFFINITY_SUPPORTED
5333 __kmp_partition_places(team);
5339 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5341 int old_nproc = team->t.t_nproc;
5342 team->t.t_size_changed = 1;
5344#if KMP_NESTED_HOT_TEAMS
5345 int avail_threads = hot_teams[level].hot_team_nth;
5346 if (new_nproc < avail_threads)
5347 avail_threads = new_nproc;
5348 kmp_info_t **other_threads = team->t.t_threads;
5349 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5353 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5354 for (b = 0; b < bs_last_barrier; ++b) {
5355 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5356 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5358 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5362 if (hot_teams[level].hot_team_nth >= new_nproc) {
5365 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5366 team->t.t_nproc = new_nproc;
5370 team->t.t_nproc = hot_teams[level].hot_team_nth;
5371 hot_teams[level].hot_team_nth = new_nproc;
5373 if (team->t.t_max_nproc < new_nproc) {
5375 __kmp_reallocate_team_arrays(team, new_nproc);
5376 __kmp_reinitialize_team(team, new_icvs, NULL);
5379#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5385 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5389 for (f = team->t.t_nproc; f < new_nproc; f++) {
5390 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5391 KMP_DEBUG_ASSERT(new_worker);
5392 team->t.t_threads[f] = new_worker;
5395 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5396 "join=%llu, plain=%llu\n",
5397 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5398 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5399 team->t.t_bar[bs_plain_barrier].b_arrived));
5403 kmp_balign_t *balign = new_worker->th.th_bar;
5404 for (b = 0; b < bs_last_barrier; ++b) {
5405 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5406 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5407 KMP_BARRIER_PARENT_FLAG);
5409 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5415#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5417 new_temp_affinity.restore();
5419#if KMP_NESTED_HOT_TEAMS
5422 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5425 __kmp_add_threads_to_team(team, new_nproc);
5429 __kmp_initialize_team(team, new_nproc, new_icvs,
5430 root->r.r_uber_thread->th.th_ident);
5433 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5434 for (f = 0; f < team->t.t_nproc; ++f)
5435 __kmp_initialize_info(team->t.t_threads[f], team, f,
5436 __kmp_gtid_from_tid(f, team));
5439 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5440 for (f = old_nproc; f < team->t.t_nproc; ++f)
5441 team->t.t_threads[f]->th.th_task_state = old_state;
5444 for (f = 0; f < team->t.t_nproc; ++f) {
5445 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5446 team->t.t_threads[f]->th.th_team_nproc ==
5451 if (do_place_partition) {
5452 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5453#if KMP_AFFINITY_SUPPORTED
5454 __kmp_partition_places(team);
5459 kmp_info_t *master = team->t.t_threads[0];
5460 if (master->th.th_teams_microtask) {
5461 for (f = 1; f < new_nproc; ++f) {
5463 kmp_info_t *thr = team->t.t_threads[f];
5464 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5465 thr->th.th_teams_level = master->th.th_teams_level;
5466 thr->th.th_teams_size = master->th.th_teams_size;
5469#if KMP_NESTED_HOT_TEAMS
5473 for (f = 1; f < new_nproc; ++f) {
5474 kmp_info_t *thr = team->t.t_threads[f];
5476 kmp_balign_t *balign = thr->th.th_bar;
5477 for (b = 0; b < bs_last_barrier; ++b) {
5478 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5479 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5481 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5489 __kmp_alloc_argv_entries(argc, team, TRUE);
5490 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5494 KF_TRACE(10, (
" hot_team = %p\n", team));
5497 if (__kmp_tasking_mode != tskm_immediate_exec) {
5498 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5499 "task_team[1] = %p after reinit\n",
5500 team->t.t_task_team[0], team->t.t_task_team[1]));
5505 __ompt_team_assign_id(team, ompt_parallel_data);
5515 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5518 if (team->t.t_max_nproc >= max_nproc) {
5520 __kmp_team_pool = team->t.t_next_pool;
5522 if (max_nproc > 1 &&
5523 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5525 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5530 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5532 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5533 "task_team[1] %p to NULL\n",
5534 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5535 team->t.t_task_team[0] = NULL;
5536 team->t.t_task_team[1] = NULL;
5539 __kmp_alloc_argv_entries(argc, team, TRUE);
5540 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5543 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5544 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5547 for (b = 0; b < bs_last_barrier; ++b) {
5548 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5550 team->t.t_bar[b].b_master_arrived = 0;
5551 team->t.t_bar[b].b_team_arrived = 0;
5556 team->t.t_proc_bind = new_proc_bind;
5558 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5562 __ompt_team_assign_id(team, ompt_parallel_data);
5574 team = __kmp_reap_team(team);
5575 __kmp_team_pool = team;
5580 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5583 team->t.t_max_nproc = max_nproc;
5584 if (max_nproc > 1 &&
5585 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5587 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5592 __kmp_allocate_team_arrays(team, max_nproc);
5594 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5595 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5597 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5599 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5600 team->t.t_task_team[0] = NULL;
5602 team->t.t_task_team[1] = NULL;
5605 if (__kmp_storage_map) {
5606 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5610 __kmp_alloc_argv_entries(argc, team, FALSE);
5611 team->t.t_argc = argc;
5614 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5615 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5618 for (b = 0; b < bs_last_barrier; ++b) {
5619 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5621 team->t.t_bar[b].b_master_arrived = 0;
5622 team->t.t_bar[b].b_team_arrived = 0;
5627 team->t.t_proc_bind = new_proc_bind;
5630 __ompt_team_assign_id(team, ompt_parallel_data);
5631 team->t.ompt_serialized_team_info = NULL;
5636 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5647void __kmp_free_team(kmp_root_t *root,
5648 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5650 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5654 KMP_DEBUG_ASSERT(root);
5655 KMP_DEBUG_ASSERT(team);
5656 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5657 KMP_DEBUG_ASSERT(team->t.t_threads);
5659 int use_hot_team = team == root->r.r_hot_team;
5660#if KMP_NESTED_HOT_TEAMS
5663 level = team->t.t_active_level - 1;
5664 if (master->th.th_teams_microtask) {
5665 if (master->th.th_teams_size.nteams > 1) {
5669 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5670 master->th.th_teams_level == team->t.t_level) {
5676 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5678 if (level < __kmp_hot_teams_max_level) {
5679 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5686 TCW_SYNC_PTR(team->t.t_pkfn,
5689 team->t.t_copyin_counter = 0;
5694 if (!use_hot_team) {
5695 if (__kmp_tasking_mode != tskm_immediate_exec) {
5697 for (f = 1; f < team->t.t_nproc; ++f) {
5698 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5699 kmp_info_t *th = team->t.t_threads[f];
5700 volatile kmp_uint32 *state = &th->th.th_reap_state;
5701 while (*state != KMP_SAFE_TO_REAP) {
5705 if (!__kmp_is_thread_alive(th, &ecode)) {
5706 *state = KMP_SAFE_TO_REAP;
5711 if (th->th.th_sleep_loc)
5712 __kmp_null_resume_wrapper(th);
5719 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5720 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5721 if (task_team != NULL) {
5722 for (f = 0; f < team->t.t_nproc; ++f) {
5723 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5724 team->t.t_threads[f]->th.th_task_team = NULL;
5728 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5729 __kmp_get_gtid(), task_team, team->t.t_id));
5730#if KMP_NESTED_HOT_TEAMS
5731 __kmp_free_task_team(master, task_team);
5733 team->t.t_task_team[tt_idx] = NULL;
5739 team->t.t_parent = NULL;
5740 team->t.t_level = 0;
5741 team->t.t_active_level = 0;
5744 for (f = 1; f < team->t.t_nproc; ++f) {
5745 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5746 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5747 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5750 __kmp_free_thread(team->t.t_threads[f]);
5753 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5756 team->t.b->go_release();
5757 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5758 for (f = 1; f < team->t.t_nproc; ++f) {
5759 if (team->t.b->sleep[f].sleep) {
5760 __kmp_atomic_resume_64(
5761 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5762 (kmp_atomic_flag_64<> *)NULL);
5767 for (
int f = 1; f < team->t.t_nproc; ++f) {
5768 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5774 for (f = 1; f < team->t.t_nproc; ++f) {
5775 team->t.t_threads[f] = NULL;
5778 if (team->t.t_max_nproc > 1 &&
5779 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5780 distributedBarrier::deallocate(team->t.b);
5785 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5786 __kmp_team_pool = (
volatile kmp_team_t *)team;
5789 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5790 team->t.t_threads[1]->th.th_cg_roots);
5791 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5793 for (f = 1; f < team->t.t_nproc; ++f) {
5794 kmp_info_t *thr = team->t.t_threads[f];
5795 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5796 thr->th.th_cg_roots->cg_root == thr);
5798 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5799 thr->th.th_cg_roots = tmp->up;
5800 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5801 " up to node %p. cg_nthreads was %d\n",
5802 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5803 int i = tmp->cg_nthreads--;
5808 if (thr->th.th_cg_roots)
5809 thr->th.th_current_task->td_icvs.thread_limit =
5810 thr->th.th_cg_roots->cg_thread_limit;
5819kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5820 kmp_team_t *next_pool = team->t.t_next_pool;
5822 KMP_DEBUG_ASSERT(team);
5823 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5824 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5825 KMP_DEBUG_ASSERT(team->t.t_threads);
5826 KMP_DEBUG_ASSERT(team->t.t_argv);
5831 __kmp_free_team_arrays(team);
5832 if (team->t.t_argv != &team->t.t_inline_argv[0])
5833 __kmp_free((
void *)team->t.t_argv);
5865void __kmp_free_thread(kmp_info_t *this_th) {
5869 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5870 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5872 KMP_DEBUG_ASSERT(this_th);
5877 kmp_balign_t *balign = this_th->th.th_bar;
5878 for (b = 0; b < bs_last_barrier; ++b) {
5879 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5880 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5881 balign[b].bb.team = NULL;
5882 balign[b].bb.leaf_kids = 0;
5884 this_th->th.th_task_state = 0;
5885 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5888 TCW_PTR(this_th->th.th_team, NULL);
5889 TCW_PTR(this_th->th.th_root, NULL);
5890 TCW_PTR(this_th->th.th_dispatch, NULL);
5892 while (this_th->th.th_cg_roots) {
5893 this_th->th.th_cg_roots->cg_nthreads--;
5894 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5895 " %p of thread %p to %d\n",
5896 this_th, this_th->th.th_cg_roots,
5897 this_th->th.th_cg_roots->cg_root,
5898 this_th->th.th_cg_roots->cg_nthreads));
5899 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5900 if (tmp->cg_root == this_th) {
5901 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5903 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5904 this_th->th.th_cg_roots = tmp->up;
5907 if (tmp->cg_nthreads == 0) {
5910 this_th->th.th_cg_roots = NULL;
5920 __kmp_free_implicit_task(this_th);
5921 this_th->th.th_current_task = NULL;
5925 gtid = this_th->th.th_info.ds.ds_gtid;
5926 if (__kmp_thread_pool_insert_pt != NULL) {
5927 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5928 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5929 __kmp_thread_pool_insert_pt = NULL;
5938 if (__kmp_thread_pool_insert_pt != NULL) {
5939 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5941 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5943 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5944 scan = &((*scan)->th.th_next_pool))
5949 TCW_PTR(this_th->th.th_next_pool, *scan);
5950 __kmp_thread_pool_insert_pt = *scan = this_th;
5951 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5952 (this_th->th.th_info.ds.ds_gtid <
5953 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5954 TCW_4(this_th->th.th_in_pool, TRUE);
5955 __kmp_suspend_initialize_thread(this_th);
5956 __kmp_lock_suspend_mx(this_th);
5957 if (this_th->th.th_active == TRUE) {
5958 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5959 this_th->th.th_active_in_pool = TRUE;
5963 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5966 __kmp_unlock_suspend_mx(this_th);
5968 TCW_4(__kmp_nth, __kmp_nth - 1);
5970#ifdef KMP_ADJUST_BLOCKTIME
5973 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5974 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5975 if (__kmp_nth <= __kmp_avail_proc) {
5976 __kmp_zero_bt = FALSE;
5986void *__kmp_launch_thread(kmp_info_t *this_thr) {
5987#if OMP_PROFILING_SUPPORT
5988 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5990 if (ProfileTraceFile)
5991 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5994 int gtid = this_thr->th.th_info.ds.ds_gtid;
5996 kmp_team_t **
volatile pteam;
5999 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6001 if (__kmp_env_consistency_check) {
6002 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6006 if (ompd_state & OMPD_ENABLE_BP)
6007 ompd_bp_thread_begin();
6011 ompt_data_t *thread_data =
nullptr;
6012 if (ompt_enabled.enabled) {
6013 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6014 *thread_data = ompt_data_none;
6016 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6017 this_thr->th.ompt_thread_info.wait_id = 0;
6018 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6019 this_thr->th.ompt_thread_info.parallel_flags = 0;
6020 if (ompt_enabled.ompt_callback_thread_begin) {
6021 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6022 ompt_thread_worker, thread_data);
6024 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6029 while (!TCR_4(__kmp_global.g.g_done)) {
6030 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6034 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6037 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6040 if (ompt_enabled.enabled) {
6041 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6045 pteam = &this_thr->th.th_team;
6048 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6050 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6053 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6054 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6055 (*pteam)->t.t_pkfn));
6057 updateHWFPControl(*pteam);
6060 if (ompt_enabled.enabled) {
6061 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6065 rc = (*pteam)->t.t_invoke(gtid);
6069 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6070 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6071 (*pteam)->t.t_pkfn));
6074 if (ompt_enabled.enabled) {
6076 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6078 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6082 __kmp_join_barrier(gtid);
6087 if (ompd_state & OMPD_ENABLE_BP)
6088 ompd_bp_thread_end();
6092 if (ompt_enabled.ompt_callback_thread_end) {
6093 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6097 this_thr->th.th_task_team = NULL;
6099 __kmp_common_destroy_gtid(gtid);
6101 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6104#if OMP_PROFILING_SUPPORT
6105 llvm::timeTraceProfilerFinishThread();
6112void __kmp_internal_end_dest(
void *specific_gtid) {
6115 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6117 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6121 __kmp_internal_end_thread(gtid);
6124#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6126__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6127 __kmp_internal_end_atexit();
6134void __kmp_internal_end_atexit(
void) {
6135 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6159 __kmp_internal_end_library(-1);
6161 __kmp_close_console();
6165static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6170 KMP_DEBUG_ASSERT(thread != NULL);
6172 gtid = thread->th.th_info.ds.ds_gtid;
6175 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6178 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6180 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6182 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6184 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6188 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6190 __kmp_release_64(&flag);
6195 __kmp_reap_worker(thread);
6207 if (thread->th.th_active_in_pool) {
6208 thread->th.th_active_in_pool = FALSE;
6209 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6210 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6214 __kmp_free_implicit_task(thread);
6218 __kmp_free_fast_memory(thread);
6221 __kmp_suspend_uninitialize_thread(thread);
6223 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6224 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6229#ifdef KMP_ADJUST_BLOCKTIME
6232 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6233 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6234 if (__kmp_nth <= __kmp_avail_proc) {
6235 __kmp_zero_bt = FALSE;
6241 if (__kmp_env_consistency_check) {
6242 if (thread->th.th_cons) {
6243 __kmp_free_cons_stack(thread->th.th_cons);
6244 thread->th.th_cons = NULL;
6248 if (thread->th.th_pri_common != NULL) {
6249 __kmp_free(thread->th.th_pri_common);
6250 thread->th.th_pri_common = NULL;
6253 if (thread->th.th_task_state_memo_stack != NULL) {
6254 __kmp_free(thread->th.th_task_state_memo_stack);
6255 thread->th.th_task_state_memo_stack = NULL;
6259 if (thread->th.th_local.bget_data != NULL) {
6260 __kmp_finalize_bget(thread);
6264#if KMP_AFFINITY_SUPPORTED
6265 if (thread->th.th_affin_mask != NULL) {
6266 KMP_CPU_FREE(thread->th.th_affin_mask);
6267 thread->th.th_affin_mask = NULL;
6271#if KMP_USE_HIER_SCHED
6272 if (thread->th.th_hier_bar_data != NULL) {
6273 __kmp_free(thread->th.th_hier_bar_data);
6274 thread->th.th_hier_bar_data = NULL;
6278 __kmp_reap_team(thread->th.th_serial_team);
6279 thread->th.th_serial_team = NULL;
6286static void __kmp_itthash_clean(kmp_info_t *th) {
6288 if (__kmp_itt_region_domains.count > 0) {
6289 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6290 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6292 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6293 __kmp_thread_free(th, bucket);
6298 if (__kmp_itt_barrier_domains.count > 0) {
6299 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6300 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6302 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6303 __kmp_thread_free(th, bucket);
6311static void __kmp_internal_end(
void) {
6315 __kmp_unregister_library();
6322 __kmp_reclaim_dead_roots();
6326 for (i = 0; i < __kmp_threads_capacity; i++)
6328 if (__kmp_root[i]->r.r_active)
6331 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6333 if (i < __kmp_threads_capacity) {
6345 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6346 if (TCR_4(__kmp_init_monitor)) {
6347 __kmp_reap_monitor(&__kmp_monitor);
6348 TCW_4(__kmp_init_monitor, 0);
6350 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6351 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6357 for (i = 0; i < __kmp_threads_capacity; i++) {
6358 if (__kmp_root[i]) {
6361 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6370 while (__kmp_thread_pool != NULL) {
6372 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6373 __kmp_thread_pool = thread->th.th_next_pool;
6375 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6376 thread->th.th_next_pool = NULL;
6377 thread->th.th_in_pool = FALSE;
6378 __kmp_reap_thread(thread, 0);
6380 __kmp_thread_pool_insert_pt = NULL;
6383 while (__kmp_team_pool != NULL) {
6385 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6386 __kmp_team_pool = team->t.t_next_pool;
6388 team->t.t_next_pool = NULL;
6389 __kmp_reap_team(team);
6392 __kmp_reap_task_teams();
6399 for (i = 0; i < __kmp_threads_capacity; i++) {
6400 kmp_info_t *thr = __kmp_threads[i];
6401 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6406 for (i = 0; i < __kmp_threads_capacity; ++i) {
6413 TCW_SYNC_4(__kmp_init_common, FALSE);
6415 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6423 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6424 if (TCR_4(__kmp_init_monitor)) {
6425 __kmp_reap_monitor(&__kmp_monitor);
6426 TCW_4(__kmp_init_monitor, 0);
6428 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6429 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6432 TCW_4(__kmp_init_gtid, FALSE);
6441void __kmp_internal_end_library(
int gtid_req) {
6448 if (__kmp_global.g.g_abort) {
6449 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6453 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6454 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6459 if (TCR_4(__kmp_init_hidden_helper) &&
6460 !TCR_4(__kmp_hidden_helper_team_done)) {
6461 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6463 __kmp_hidden_helper_main_thread_release();
6465 __kmp_hidden_helper_threads_deinitz_wait();
6471 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6473 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6474 if (gtid == KMP_GTID_SHUTDOWN) {
6475 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6476 "already shutdown\n"));
6478 }
else if (gtid == KMP_GTID_MONITOR) {
6479 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6480 "registered, or system shutdown\n"));
6482 }
else if (gtid == KMP_GTID_DNE) {
6483 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6486 }
else if (KMP_UBER_GTID(gtid)) {
6488 if (__kmp_root[gtid]->r.r_active) {
6489 __kmp_global.g.g_abort = -1;
6490 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6491 __kmp_unregister_library();
6493 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6497 __kmp_itthash_clean(__kmp_threads[gtid]);
6500 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6501 __kmp_unregister_root_current_thread(gtid);
6508#ifdef DUMP_DEBUG_ON_EXIT
6509 if (__kmp_debug_buf)
6510 __kmp_dump_debug_buffer();
6515 __kmp_unregister_library();
6520 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6523 if (__kmp_global.g.g_abort) {
6524 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6526 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6529 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6530 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6539 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6542 __kmp_internal_end();
6544 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6545 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6547 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6549#ifdef DUMP_DEBUG_ON_EXIT
6550 if (__kmp_debug_buf)
6551 __kmp_dump_debug_buffer();
6555 __kmp_close_console();
6558 __kmp_fini_allocator();
6562void __kmp_internal_end_thread(
int gtid_req) {
6571 if (__kmp_global.g.g_abort) {
6572 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6576 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6577 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6582 if (TCR_4(__kmp_init_hidden_helper) &&
6583 !TCR_4(__kmp_hidden_helper_team_done)) {
6584 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6586 __kmp_hidden_helper_main_thread_release();
6588 __kmp_hidden_helper_threads_deinitz_wait();
6595 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6597 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6598 if (gtid == KMP_GTID_SHUTDOWN) {
6599 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6600 "already shutdown\n"));
6602 }
else if (gtid == KMP_GTID_MONITOR) {
6603 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6604 "registered, or system shutdown\n"));
6606 }
else if (gtid == KMP_GTID_DNE) {
6607 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6611 }
else if (KMP_UBER_GTID(gtid)) {
6613 if (__kmp_root[gtid]->r.r_active) {
6614 __kmp_global.g.g_abort = -1;
6615 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6617 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6621 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6623 __kmp_unregister_root_current_thread(gtid);
6627 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6630 __kmp_threads[gtid]->th.th_task_team = NULL;
6634 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6640 if (__kmp_pause_status != kmp_hard_paused)
6644 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6649 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6652 if (__kmp_global.g.g_abort) {
6653 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6655 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6658 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6659 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6670 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6672 for (i = 0; i < __kmp_threads_capacity; ++i) {
6673 if (KMP_UBER_GTID(i)) {
6676 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6677 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6678 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6685 __kmp_internal_end();
6687 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6688 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6690 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6692#ifdef DUMP_DEBUG_ON_EXIT
6693 if (__kmp_debug_buf)
6694 __kmp_dump_debug_buffer();
6701static long __kmp_registration_flag = 0;
6703static char *__kmp_registration_str = NULL;
6706static inline char *__kmp_reg_status_name() {
6712#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6713 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6716 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6720#if defined(KMP_USE_SHM)
6721bool __kmp_shm_available =
false;
6722bool __kmp_tmp_available =
false;
6724char *temp_reg_status_file_name =
nullptr;
6727void __kmp_register_library_startup(
void) {
6729 char *name = __kmp_reg_status_name();
6735#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6736 __kmp_initialize_system_tick();
6738 __kmp_read_system_time(&time.dtime);
6739 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6740 __kmp_registration_str =
6741 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6742 __kmp_registration_flag, KMP_LIBRARY_FILE);
6744 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6745 __kmp_registration_str));
6751#if defined(KMP_USE_SHM)
6752 char *shm_name =
nullptr;
6753 char *data1 =
nullptr;
6754 __kmp_shm_available = __kmp_detect_shm();
6755 if (__kmp_shm_available) {
6757 shm_name = __kmp_str_format(
"/%s", name);
6758 int shm_preexist = 0;
6759 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6760 if ((fd1 == -1) && (errno == EEXIST)) {
6763 fd1 = shm_open(shm_name, O_RDWR, 0666);
6765 KMP_WARNING(FunctionError,
"Can't open SHM");
6766 __kmp_shm_available =
false;
6771 if (__kmp_shm_available && shm_preexist == 0) {
6772 if (ftruncate(fd1, SHM_SIZE) == -1) {
6773 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6774 __kmp_shm_available =
false;
6777 if (__kmp_shm_available) {
6778 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6780 if (data1 == MAP_FAILED) {
6781 KMP_WARNING(FunctionError,
"Can't map SHM");
6782 __kmp_shm_available =
false;
6785 if (__kmp_shm_available) {
6786 if (shm_preexist == 0) {
6787 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6790 value = __kmp_str_format(
"%s", data1);
6791 munmap(data1, SHM_SIZE);
6796 if (!__kmp_shm_available)
6797 __kmp_tmp_available = __kmp_detect_tmp();
6798 if (!__kmp_shm_available && __kmp_tmp_available) {
6805 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6806 int tmp_preexist = 0;
6807 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6808 if ((fd1 == -1) && (errno == EEXIST)) {
6811 fd1 = open(temp_reg_status_file_name, O_RDWR, 0666);
6813 KMP_WARNING(FunctionError,
"Can't open TEMP");
6814 __kmp_tmp_available =
false;
6819 if (__kmp_tmp_available && tmp_preexist == 0) {
6821 if (ftruncate(fd1, SHM_SIZE) == -1) {
6822 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6823 __kmp_tmp_available =
false;
6826 if (__kmp_tmp_available) {
6827 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6829 if (data1 == MAP_FAILED) {
6830 KMP_WARNING(FunctionError,
"Can't map /tmp");
6831 __kmp_tmp_available =
false;
6834 if (__kmp_tmp_available) {
6835 if (tmp_preexist == 0) {
6836 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6839 value = __kmp_str_format(
"%s", data1);
6840 munmap(data1, SHM_SIZE);
6845 if (!__kmp_shm_available && !__kmp_tmp_available) {
6848 __kmp_env_set(name, __kmp_registration_str, 0);
6850 value = __kmp_env_get(name);
6854 __kmp_env_set(name, __kmp_registration_str, 0);
6856 value = __kmp_env_get(name);
6859 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6866 char *flag_addr_str = NULL;
6867 char *flag_val_str = NULL;
6868 char const *file_name = NULL;
6869 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6870 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6873 unsigned long *flag_addr = 0;
6874 unsigned long flag_val = 0;
6875 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6876 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6877 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6881 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6895 file_name =
"unknown library";
6900 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6901 if (!__kmp_str_match_true(duplicate_ok)) {
6903 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6904 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6906 KMP_INTERNAL_FREE(duplicate_ok);
6907 __kmp_duplicate_library_ok = 1;
6912#if defined(KMP_USE_SHM)
6913 if (__kmp_shm_available) {
6914 shm_unlink(shm_name);
6915 }
else if (__kmp_tmp_available) {
6916 unlink(temp_reg_status_file_name);
6919 __kmp_env_unset(name);
6923 __kmp_env_unset(name);
6927 KMP_DEBUG_ASSERT(0);
6931 KMP_INTERNAL_FREE((
void *)value);
6932#if defined(KMP_USE_SHM)
6934 KMP_INTERNAL_FREE((
void *)shm_name);
6937 KMP_INTERNAL_FREE((
void *)name);
6941void __kmp_unregister_library(
void) {
6943 char *name = __kmp_reg_status_name();
6946#if defined(KMP_USE_SHM)
6947 char *shm_name =
nullptr;
6949 if (__kmp_shm_available) {
6950 shm_name = __kmp_str_format(
"/%s", name);
6951 fd1 = shm_open(shm_name, O_RDONLY, 0666);
6953 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6954 if (data1 != MAP_FAILED) {
6955 value = __kmp_str_format(
"%s", data1);
6956 munmap(data1, SHM_SIZE);
6960 }
else if (__kmp_tmp_available) {
6961 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6963 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6964 if (data1 != MAP_FAILED) {
6965 value = __kmp_str_format(
"%s", data1);
6966 munmap(data1, SHM_SIZE);
6971 value = __kmp_env_get(name);
6974 value = __kmp_env_get(name);
6977 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6978 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6979 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6981#if defined(KMP_USE_SHM)
6982 if (__kmp_shm_available) {
6983 shm_unlink(shm_name);
6984 }
else if (__kmp_tmp_available) {
6985 unlink(temp_reg_status_file_name);
6987 __kmp_env_unset(name);
6990 __kmp_env_unset(name);
6994#if defined(KMP_USE_SHM)
6996 KMP_INTERNAL_FREE(shm_name);
6997 if (temp_reg_status_file_name)
6998 KMP_INTERNAL_FREE(temp_reg_status_file_name);
7001 KMP_INTERNAL_FREE(__kmp_registration_str);
7002 KMP_INTERNAL_FREE(value);
7003 KMP_INTERNAL_FREE(name);
7005 __kmp_registration_flag = 0;
7006 __kmp_registration_str = NULL;
7013#if KMP_MIC_SUPPORTED
7015static void __kmp_check_mic_type() {
7016 kmp_cpuid_t cpuid_state = {0};
7017 kmp_cpuid_t *cs_p = &cpuid_state;
7018 __kmp_x86_cpuid(1, 0, cs_p);
7020 if ((cs_p->eax & 0xff0) == 0xB10) {
7021 __kmp_mic_type = mic2;
7022 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7023 __kmp_mic_type = mic3;
7025 __kmp_mic_type = non_mic;
7032static void __kmp_user_level_mwait_init() {
7033 struct kmp_cpuid buf;
7034 __kmp_x86_cpuid(7, 0, &buf);
7035 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7036 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7037 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7038 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7039 __kmp_umwait_enabled));
7042#ifndef AT_INTELPHIUSERMWAIT
7045#define AT_INTELPHIUSERMWAIT 10000
7050unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7051unsigned long getauxval(
unsigned long) {
return 0; }
7053static void __kmp_user_level_mwait_init() {
7058 if (__kmp_mic_type == mic3) {
7059 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7060 if ((res & 0x1) || __kmp_user_level_mwait) {
7061 __kmp_mwait_enabled = TRUE;
7062 if (__kmp_user_level_mwait) {
7063 KMP_INFORM(EnvMwaitWarn);
7066 __kmp_mwait_enabled = FALSE;
7069 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7070 "__kmp_mwait_enabled = %d\n",
7071 __kmp_mic_type, __kmp_mwait_enabled));
7075static void __kmp_do_serial_initialize(
void) {
7079 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7081 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7082 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7083 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7084 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7085 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7095 __kmp_validate_locks();
7097#if ENABLE_LIBOMPTARGET
7099 __kmp_init_omptarget();
7103 __kmp_init_allocator();
7109 if (__kmp_need_register_serial)
7110 __kmp_register_library_startup();
7113 if (TCR_4(__kmp_global.g.g_done)) {
7114 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7117 __kmp_global.g.g_abort = 0;
7118 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7121#if KMP_USE_ADAPTIVE_LOCKS
7122#if KMP_DEBUG_ADAPTIVE_LOCKS
7123 __kmp_init_speculative_stats();
7126#if KMP_STATS_ENABLED
7129 __kmp_init_lock(&__kmp_global_lock);
7130 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7131 __kmp_init_lock(&__kmp_debug_lock);
7132 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7133 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7134 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7135 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7136 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7137 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7138 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7139 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7140 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7141 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7142 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7143 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7144 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7145 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7146 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7148 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7150 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7154 __kmp_runtime_initialize();
7156#if KMP_MIC_SUPPORTED
7157 __kmp_check_mic_type();
7164 __kmp_abort_delay = 0;
7168 __kmp_dflt_team_nth_ub = __kmp_xproc;
7169 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7170 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7172 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7173 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7175 __kmp_max_nth = __kmp_sys_max_nth;
7176 __kmp_cg_max_nth = __kmp_sys_max_nth;
7177 __kmp_teams_max_nth = __kmp_xproc;
7178 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7179 __kmp_teams_max_nth = __kmp_sys_max_nth;
7184 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7186 __kmp_monitor_wakeups =
7187 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7188 __kmp_bt_intervals =
7189 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7192 __kmp_library = library_throughput;
7194 __kmp_static = kmp_sch_static_balanced;
7201#if KMP_FAST_REDUCTION_BARRIER
7202#define kmp_reduction_barrier_gather_bb ((int)1)
7203#define kmp_reduction_barrier_release_bb ((int)1)
7204#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7205#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7207 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7208 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7209 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7210 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7211 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7212#if KMP_FAST_REDUCTION_BARRIER
7213 if (i == bs_reduction_barrier) {
7215 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7216 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7217 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7218 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7222#if KMP_FAST_REDUCTION_BARRIER
7223#undef kmp_reduction_barrier_release_pat
7224#undef kmp_reduction_barrier_gather_pat
7225#undef kmp_reduction_barrier_release_bb
7226#undef kmp_reduction_barrier_gather_bb
7228#if KMP_MIC_SUPPORTED
7229 if (__kmp_mic_type == mic2) {
7231 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7232 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7234 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7235 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7237#if KMP_FAST_REDUCTION_BARRIER
7238 if (__kmp_mic_type == mic2) {
7239 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7240 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7247 __kmp_env_checks = TRUE;
7249 __kmp_env_checks = FALSE;
7253 __kmp_foreign_tp = TRUE;
7255 __kmp_global.g.g_dynamic = FALSE;
7256 __kmp_global.g.g_dynamic_mode = dynamic_default;
7258 __kmp_init_nesting_mode();
7260 __kmp_env_initialize(NULL);
7262#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7263 __kmp_user_level_mwait_init();
7267 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7268 if (__kmp_str_match_true(val)) {
7269 kmp_str_buf_t buffer;
7270 __kmp_str_buf_init(&buffer);
7271 __kmp_i18n_dump_catalog(&buffer);
7272 __kmp_printf(
"%s", buffer.str);
7273 __kmp_str_buf_free(&buffer);
7275 __kmp_env_free(&val);
7278 __kmp_threads_capacity =
7279 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7281 __kmp_tp_capacity = __kmp_default_tp_capacity(
7282 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7287 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7288 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7289 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7290 __kmp_thread_pool = NULL;
7291 __kmp_thread_pool_insert_pt = NULL;
7292 __kmp_team_pool = NULL;
7299 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7301 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7302 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7303 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7306 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7308 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7313 gtid = __kmp_register_root(TRUE);
7314 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7315 KMP_ASSERT(KMP_UBER_GTID(gtid));
7316 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7320 __kmp_common_initialize();
7324 __kmp_register_atfork();
7327#if !KMP_DYNAMIC_LIB || \
7328 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7333 int rc = atexit(__kmp_internal_end_atexit);
7335 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7341#if KMP_HANDLE_SIGNALS
7347 __kmp_install_signals(FALSE);
7350 __kmp_install_signals(TRUE);
7355 __kmp_init_counter++;
7357 __kmp_init_serial = TRUE;
7359 if (__kmp_version) {
7360 __kmp_print_version_1();
7363 if (__kmp_settings) {
7367 if (__kmp_display_env || __kmp_display_env_verbose) {
7368 __kmp_env_print_2();
7377 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7380void __kmp_serial_initialize(
void) {
7381 if (__kmp_init_serial) {
7384 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7385 if (__kmp_init_serial) {
7386 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7389 __kmp_do_serial_initialize();
7390 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7393static void __kmp_do_middle_initialize(
void) {
7395 int prev_dflt_team_nth;
7397 if (!__kmp_init_serial) {
7398 __kmp_do_serial_initialize();
7401 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7403 if (UNLIKELY(!__kmp_need_register_serial)) {
7406 __kmp_register_library_startup();
7411 prev_dflt_team_nth = __kmp_dflt_team_nth;
7413#if KMP_AFFINITY_SUPPORTED
7416 __kmp_affinity_initialize(__kmp_affinity);
7420 KMP_ASSERT(__kmp_xproc > 0);
7421 if (__kmp_avail_proc == 0) {
7422 __kmp_avail_proc = __kmp_xproc;
7428 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7429 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7434 if (__kmp_dflt_team_nth == 0) {
7435#ifdef KMP_DFLT_NTH_CORES
7437 __kmp_dflt_team_nth = __kmp_ncores;
7438 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7439 "__kmp_ncores (%d)\n",
7440 __kmp_dflt_team_nth));
7443 __kmp_dflt_team_nth = __kmp_avail_proc;
7444 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7445 "__kmp_avail_proc(%d)\n",
7446 __kmp_dflt_team_nth));
7450 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7451 __kmp_dflt_team_nth = KMP_MIN_NTH;
7453 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7454 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7457 if (__kmp_nesting_mode > 0)
7458 __kmp_set_nesting_mode_threads();
7462 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7464 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7469 for (i = 0; i < __kmp_threads_capacity; i++) {
7470 kmp_info_t *thread = __kmp_threads[i];
7473 if (thread->th.th_current_task->td_icvs.nproc != 0)
7476 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7481 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7482 __kmp_dflt_team_nth));
7484#ifdef KMP_ADJUST_BLOCKTIME
7486 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7487 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7488 if (__kmp_nth > __kmp_avail_proc) {
7489 __kmp_zero_bt = TRUE;
7495 TCW_SYNC_4(__kmp_init_middle, TRUE);
7497 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7500void __kmp_middle_initialize(
void) {
7501 if (__kmp_init_middle) {
7504 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7505 if (__kmp_init_middle) {
7506 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7509 __kmp_do_middle_initialize();
7510 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7513void __kmp_parallel_initialize(
void) {
7514 int gtid = __kmp_entry_gtid();
7517 if (TCR_4(__kmp_init_parallel))
7519 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7520 if (TCR_4(__kmp_init_parallel)) {
7521 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7526 if (TCR_4(__kmp_global.g.g_done)) {
7529 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7530 __kmp_infinite_loop();
7536 if (!__kmp_init_middle) {
7537 __kmp_do_middle_initialize();
7539 __kmp_assign_root_init_mask();
7540 __kmp_resume_if_hard_paused();
7543 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7544 KMP_ASSERT(KMP_UBER_GTID(gtid));
7546#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7549 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7550 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7551 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7555#if KMP_HANDLE_SIGNALS
7557 __kmp_install_signals(TRUE);
7561 __kmp_suspend_initialize();
7563#if defined(USE_LOAD_BALANCE)
7564 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7565 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7568 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7569 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7573 if (__kmp_version) {
7574 __kmp_print_version_2();
7578 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7581 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7583 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7586void __kmp_hidden_helper_initialize() {
7587 if (TCR_4(__kmp_init_hidden_helper))
7591 if (!TCR_4(__kmp_init_parallel))
7592 __kmp_parallel_initialize();
7596 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7597 if (TCR_4(__kmp_init_hidden_helper)) {
7598 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7602#if KMP_AFFINITY_SUPPORTED
7606 if (!__kmp_hh_affinity.flags.initialized)
7607 __kmp_affinity_initialize(__kmp_hh_affinity);
7611 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7615 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7618 __kmp_do_initialize_hidden_helper_threads();
7621 __kmp_hidden_helper_threads_initz_wait();
7624 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7626 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7631void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7633 kmp_disp_t *dispatch;
7638 this_thr->th.th_local.this_construct = 0;
7640 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7642 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7643 KMP_DEBUG_ASSERT(dispatch);
7644 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7648 dispatch->th_disp_index = 0;
7649 dispatch->th_doacross_buf_idx = 0;
7650 if (__kmp_env_consistency_check)
7651 __kmp_push_parallel(gtid, team->t.t_ident);
7656void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7658 if (__kmp_env_consistency_check)
7659 __kmp_pop_parallel(gtid, team->t.t_ident);
7661 __kmp_finish_implicit_task(this_thr);
7664int __kmp_invoke_task_func(
int gtid) {
7666 int tid = __kmp_tid_from_gtid(gtid);
7667 kmp_info_t *this_thr = __kmp_threads[gtid];
7668 kmp_team_t *team = this_thr->th.th_team;
7670 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7672 if (__itt_stack_caller_create_ptr) {
7674 if (team->t.t_stack_id != NULL) {
7675 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7677 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7678 __kmp_itt_stack_callee_enter(
7679 (__itt_caller)team->t.t_parent->t.t_stack_id);
7683#if INCLUDE_SSC_MARKS
7684 SSC_MARK_INVOKING();
7689 void **exit_frame_p;
7690 ompt_data_t *my_task_data;
7691 ompt_data_t *my_parallel_data;
7694 if (ompt_enabled.enabled) {
7695 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7696 .ompt_task_info.frame.exit_frame.ptr);
7698 exit_frame_p = &dummy;
7702 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7703 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7704 if (ompt_enabled.ompt_callback_implicit_task) {
7705 ompt_team_size = team->t.t_nproc;
7706 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7707 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7708 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7709 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7713#if KMP_STATS_ENABLED
7715 if (previous_state == stats_state_e::TEAMS_REGION) {
7716 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7718 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7720 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7723 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7724 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7731 *exit_frame_p = NULL;
7732 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7735#if KMP_STATS_ENABLED
7736 if (previous_state == stats_state_e::TEAMS_REGION) {
7737 KMP_SET_THREAD_STATE(previous_state);
7739 KMP_POP_PARTITIONED_TIMER();
7743 if (__itt_stack_caller_create_ptr) {
7745 if (team->t.t_stack_id != NULL) {
7746 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7748 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7749 __kmp_itt_stack_callee_leave(
7750 (__itt_caller)team->t.t_parent->t.t_stack_id);
7754 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7759void __kmp_teams_master(
int gtid) {
7761 kmp_info_t *thr = __kmp_threads[gtid];
7762 kmp_team_t *team = thr->th.th_team;
7763 ident_t *loc = team->t.t_ident;
7764 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7765 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7766 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7767 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7768 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7771 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7774 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7775 tmp->cg_nthreads = 1;
7776 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7777 " cg_nthreads to 1\n",
7779 tmp->up = thr->th.th_cg_roots;
7780 thr->th.th_cg_roots = tmp;
7784#if INCLUDE_SSC_MARKS
7787 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7788 (microtask_t)thr->th.th_teams_microtask,
7789 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7790#if INCLUDE_SSC_MARKS
7794 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7795 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7798 __kmp_join_call(loc, gtid
7807int __kmp_invoke_teams_master(
int gtid) {
7808 kmp_info_t *this_thr = __kmp_threads[gtid];
7809 kmp_team_t *team = this_thr->th.th_team;
7811 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7812 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7813 (
void *)__kmp_teams_master);
7815 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7817 int tid = __kmp_tid_from_gtid(gtid);
7818 ompt_data_t *task_data =
7819 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7820 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7821 if (ompt_enabled.ompt_callback_implicit_task) {
7822 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7823 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7825 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7828 __kmp_teams_master(gtid);
7830 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7832 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7841void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7842 kmp_info_t *thr = __kmp_threads[gtid];
7844 if (num_threads > 0)
7845 thr->th.th_set_nproc = num_threads;
7848static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7850 KMP_DEBUG_ASSERT(thr);
7852 if (!TCR_4(__kmp_init_middle))
7853 __kmp_middle_initialize();
7854 __kmp_assign_root_init_mask();
7855 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7856 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7858 if (num_threads == 0) {
7859 if (__kmp_teams_thread_limit > 0) {
7860 num_threads = __kmp_teams_thread_limit;
7862 num_threads = __kmp_avail_proc / num_teams;
7867 if (num_threads > __kmp_dflt_team_nth) {
7868 num_threads = __kmp_dflt_team_nth;
7870 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7871 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7873 if (num_teams * num_threads > __kmp_teams_max_nth) {
7874 num_threads = __kmp_teams_max_nth / num_teams;
7876 if (num_threads == 0) {
7880 if (num_threads < 0) {
7881 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7887 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7889 if (num_threads > __kmp_dflt_team_nth) {
7890 num_threads = __kmp_dflt_team_nth;
7892 if (num_teams * num_threads > __kmp_teams_max_nth) {
7893 int new_threads = __kmp_teams_max_nth / num_teams;
7894 if (new_threads == 0) {
7897 if (new_threads != num_threads) {
7898 if (!__kmp_reserve_warn) {
7899 __kmp_reserve_warn = 1;
7900 __kmp_msg(kmp_ms_warning,
7901 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7902 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7905 num_threads = new_threads;
7908 thr->th.th_teams_size.nth = num_threads;
7913void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7915 kmp_info_t *thr = __kmp_threads[gtid];
7916 if (num_teams < 0) {
7919 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7923 if (num_teams == 0) {
7924 if (__kmp_nteams > 0) {
7925 num_teams = __kmp_nteams;
7930 if (num_teams > __kmp_teams_max_nth) {
7931 if (!__kmp_reserve_warn) {
7932 __kmp_reserve_warn = 1;
7933 __kmp_msg(kmp_ms_warning,
7934 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7935 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7937 num_teams = __kmp_teams_max_nth;
7941 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7943 __kmp_push_thread_limit(thr, num_teams, num_threads);
7948void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7949 int num_teams_ub,
int num_threads) {
7950 kmp_info_t *thr = __kmp_threads[gtid];
7951 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7952 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7953 KMP_DEBUG_ASSERT(num_threads >= 0);
7955 if (num_teams_lb > num_teams_ub) {
7956 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7957 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7962 if (num_teams_lb == 0 && num_teams_ub > 0)
7963 num_teams_lb = num_teams_ub;
7965 if (num_teams_lb == 0 && num_teams_ub == 0) {
7966 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7967 if (num_teams > __kmp_teams_max_nth) {
7968 if (!__kmp_reserve_warn) {
7969 __kmp_reserve_warn = 1;
7970 __kmp_msg(kmp_ms_warning,
7971 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7972 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7974 num_teams = __kmp_teams_max_nth;
7976 }
else if (num_teams_lb == num_teams_ub) {
7977 num_teams = num_teams_ub;
7979 if (num_threads <= 0) {
7980 if (num_teams_ub > __kmp_teams_max_nth) {
7981 num_teams = num_teams_lb;
7983 num_teams = num_teams_ub;
7986 num_teams = (num_threads > __kmp_teams_max_nth)
7988 : __kmp_teams_max_nth / num_threads;
7989 if (num_teams < num_teams_lb) {
7990 num_teams = num_teams_lb;
7991 }
else if (num_teams > num_teams_ub) {
7992 num_teams = num_teams_ub;
7998 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8000 __kmp_push_thread_limit(thr, num_teams, num_threads);
8004void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
8005 kmp_info_t *thr = __kmp_threads[gtid];
8006 thr->th.th_set_proc_bind = proc_bind;
8011void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
8012 kmp_info_t *this_thr = __kmp_threads[gtid];
8018 KMP_DEBUG_ASSERT(team);
8019 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8020 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8023 team->t.t_construct = 0;
8024 team->t.t_ordered.dt.t_value =
8028 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8029 if (team->t.t_max_nproc > 1) {
8031 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8032 team->t.t_disp_buffer[i].buffer_index = i;
8033 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8036 team->t.t_disp_buffer[0].buffer_index = 0;
8037 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8041 KMP_ASSERT(this_thr->th.th_team == team);
8044 for (f = 0; f < team->t.t_nproc; f++) {
8045 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8046 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8051 __kmp_fork_barrier(gtid, 0);
8054void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8055 kmp_info_t *this_thr = __kmp_threads[gtid];
8057 KMP_DEBUG_ASSERT(team);
8058 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8059 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8065 if (__kmp_threads[gtid] &&
8066 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8067 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8068 __kmp_threads[gtid]);
8069 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8070 "team->t.t_nproc=%d\n",
8071 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8073 __kmp_print_structure();
8075 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8076 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8079 __kmp_join_barrier(gtid);
8081 if (ompt_enabled.enabled &&
8082 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8083 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8084 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8085 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8087 void *codeptr = NULL;
8088 if (KMP_MASTER_TID(ds_tid) &&
8089 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8090 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8091 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8093 if (ompt_enabled.ompt_callback_sync_region_wait) {
8094 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8095 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8098 if (ompt_enabled.ompt_callback_sync_region) {
8099 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8100 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8104 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8105 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8106 ompt_scope_end, NULL, task_data, 0, ds_tid,
8107 ompt_task_implicit);
8113 KMP_ASSERT(this_thr->th.th_team == team);
8118#ifdef USE_LOAD_BALANCE
8122static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8125 kmp_team_t *hot_team;
8127 if (root->r.r_active) {
8130 hot_team = root->r.r_hot_team;
8131 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8132 return hot_team->t.t_nproc - 1;
8137 for (i = 1; i < hot_team->t.t_nproc; i++) {
8138 if (hot_team->t.t_threads[i]->th.th_active) {
8147static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8150 int hot_team_active;
8151 int team_curr_active;
8154 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8156 KMP_DEBUG_ASSERT(root);
8157 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8158 ->th.th_current_task->td_icvs.dynamic == TRUE);
8159 KMP_DEBUG_ASSERT(set_nproc > 1);
8161 if (set_nproc == 1) {
8162 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8171 pool_active = __kmp_thread_pool_active_nth;
8172 hot_team_active = __kmp_active_hot_team_nproc(root);
8173 team_curr_active = pool_active + hot_team_active + 1;
8176 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8177 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8178 "hot team active = %d\n",
8179 system_active, pool_active, hot_team_active));
8181 if (system_active < 0) {
8185 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8186 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8189 retval = __kmp_avail_proc - __kmp_nth +
8190 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8191 if (retval > set_nproc) {
8194 if (retval < KMP_MIN_NTH) {
8195 retval = KMP_MIN_NTH;
8198 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8206 if (system_active < team_curr_active) {
8207 system_active = team_curr_active;
8209 retval = __kmp_avail_proc - system_active + team_curr_active;
8210 if (retval > set_nproc) {
8213 if (retval < KMP_MIN_NTH) {
8214 retval = KMP_MIN_NTH;
8217 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8226void __kmp_cleanup(
void) {
8229 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8231 if (TCR_4(__kmp_init_parallel)) {
8232#if KMP_HANDLE_SIGNALS
8233 __kmp_remove_signals();
8235 TCW_4(__kmp_init_parallel, FALSE);
8238 if (TCR_4(__kmp_init_middle)) {
8239#if KMP_AFFINITY_SUPPORTED
8240 __kmp_affinity_uninitialize();
8242 __kmp_cleanup_hierarchy();
8243 TCW_4(__kmp_init_middle, FALSE);
8246 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8248 if (__kmp_init_serial) {
8249 __kmp_runtime_destroy();
8250 __kmp_init_serial = FALSE;
8253 __kmp_cleanup_threadprivate_caches();
8255 for (f = 0; f < __kmp_threads_capacity; f++) {
8256 if (__kmp_root[f] != NULL) {
8257 __kmp_free(__kmp_root[f]);
8258 __kmp_root[f] = NULL;
8261 __kmp_free(__kmp_threads);
8264 __kmp_threads = NULL;
8266 __kmp_threads_capacity = 0;
8269 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8271 kmp_old_threads_list_t *next = ptr->next;
8272 __kmp_free(ptr->threads);
8277#if KMP_USE_DYNAMIC_LOCK
8278 __kmp_cleanup_indirect_user_locks();
8280 __kmp_cleanup_user_locks();
8284 __kmp_free(ompd_env_block);
8285 ompd_env_block = NULL;
8286 ompd_env_block_size = 0;
8290#if KMP_AFFINITY_SUPPORTED
8291 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8292 __kmp_cpuinfo_file = NULL;
8295#if KMP_USE_ADAPTIVE_LOCKS
8296#if KMP_DEBUG_ADAPTIVE_LOCKS
8297 __kmp_print_speculative_stats();
8300 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8301 __kmp_nested_nth.nth = NULL;
8302 __kmp_nested_nth.size = 0;
8303 __kmp_nested_nth.used = 0;
8304 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8305 __kmp_nested_proc_bind.bind_types = NULL;
8306 __kmp_nested_proc_bind.size = 0;
8307 __kmp_nested_proc_bind.used = 0;
8308 if (__kmp_affinity_format) {
8309 KMP_INTERNAL_FREE(__kmp_affinity_format);
8310 __kmp_affinity_format = NULL;
8313 __kmp_i18n_catclose();
8315#if KMP_USE_HIER_SCHED
8316 __kmp_hier_scheds.deallocate();
8319#if KMP_STATS_ENABLED
8323 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8328int __kmp_ignore_mppbeg(
void) {
8331 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8332 if (__kmp_str_match_false(env))
8339int __kmp_ignore_mppend(
void) {
8342 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8343 if (__kmp_str_match_false(env))
8350void __kmp_internal_begin(
void) {
8356 gtid = __kmp_entry_gtid();
8357 root = __kmp_threads[gtid]->th.th_root;
8358 KMP_ASSERT(KMP_UBER_GTID(gtid));
8360 if (root->r.r_begin)
8362 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8363 if (root->r.r_begin) {
8364 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8368 root->r.r_begin = TRUE;
8370 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8375void __kmp_user_set_library(
enum library_type arg) {
8382 gtid = __kmp_entry_gtid();
8383 thread = __kmp_threads[gtid];
8385 root = thread->th.th_root;
8387 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8389 if (root->r.r_in_parallel) {
8391 KMP_WARNING(SetLibraryIncorrectCall);
8396 case library_serial:
8397 thread->th.th_set_nproc = 0;
8398 set__nproc(thread, 1);
8400 case library_turnaround:
8401 thread->th.th_set_nproc = 0;
8402 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8403 : __kmp_dflt_team_nth_ub);
8405 case library_throughput:
8406 thread->th.th_set_nproc = 0;
8407 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8408 : __kmp_dflt_team_nth_ub);
8411 KMP_FATAL(UnknownLibraryType, arg);
8414 __kmp_aux_set_library(arg);
8417void __kmp_aux_set_stacksize(
size_t arg) {
8418 if (!__kmp_init_serial)
8419 __kmp_serial_initialize();
8422 if (arg & (0x1000 - 1)) {
8423 arg &= ~(0x1000 - 1);
8428 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8431 if (!TCR_4(__kmp_init_parallel)) {
8434 if (value < __kmp_sys_min_stksize)
8435 value = __kmp_sys_min_stksize;
8436 else if (value > KMP_MAX_STKSIZE)
8437 value = KMP_MAX_STKSIZE;
8439 __kmp_stksize = value;
8441 __kmp_env_stksize = TRUE;
8444 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8449void __kmp_aux_set_library(
enum library_type arg) {
8450 __kmp_library = arg;
8452 switch (__kmp_library) {
8453 case library_serial: {
8454 KMP_INFORM(LibraryIsSerial);
8456 case library_turnaround:
8457 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8458 __kmp_use_yield = 2;
8460 case library_throughput:
8461 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8462 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8465 KMP_FATAL(UnknownLibraryType, arg);
8471static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8472 kmp_info_t *thr = __kmp_entry_thread();
8473 teams_serialized = 0;
8474 if (thr->th.th_teams_microtask) {
8475 kmp_team_t *team = thr->th.th_team;
8476 int tlevel = thr->th.th_teams_level;
8477 int ii = team->t.t_level;
8478 teams_serialized = team->t.t_serialized;
8479 int level = tlevel + 1;
8480 KMP_DEBUG_ASSERT(ii >= tlevel);
8481 while (ii > level) {
8482 for (teams_serialized = team->t.t_serialized;
8483 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8485 if (team->t.t_serialized && (!teams_serialized)) {
8486 team = team->t.t_parent;
8490 team = team->t.t_parent;
8499int __kmp_aux_get_team_num() {
8501 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8503 if (serialized > 1) {
8506 return team->t.t_master_tid;
8512int __kmp_aux_get_num_teams() {
8514 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8516 if (serialized > 1) {
8519 return team->t.t_parent->t.t_nproc;
8558typedef struct kmp_affinity_format_field_t {
8560 const char *long_name;
8563} kmp_affinity_format_field_t;
8565static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8566#if KMP_AFFINITY_SUPPORTED
8567 {
'A',
"thread_affinity",
's'},
8569 {
't',
"team_num",
'd'},
8570 {
'T',
"num_teams",
'd'},
8571 {
'L',
"nesting_level",
'd'},
8572 {
'n',
"thread_num",
'd'},
8573 {
'N',
"num_threads",
'd'},
8574 {
'a',
"ancestor_tnum",
'd'},
8576 {
'P',
"process_id",
'd'},
8577 {
'i',
"native_thread_id",
'd'}};
8580static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8582 kmp_str_buf_t *field_buffer) {
8583 int rc, format_index, field_value;
8584 const char *width_left, *width_right;
8585 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8586 static const int FORMAT_SIZE = 20;
8587 char format[FORMAT_SIZE] = {0};
8588 char absolute_short_name = 0;
8590 KMP_DEBUG_ASSERT(gtid >= 0);
8591 KMP_DEBUG_ASSERT(th);
8592 KMP_DEBUG_ASSERT(**ptr ==
'%');
8593 KMP_DEBUG_ASSERT(field_buffer);
8595 __kmp_str_buf_clear(field_buffer);
8602 __kmp_str_buf_cat(field_buffer,
"%", 1);
8613 right_justify =
false;
8615 right_justify =
true;
8619 width_left = width_right = NULL;
8620 if (**ptr >=
'0' && **ptr <=
'9') {
8628 format[format_index++] =
'%';
8630 format[format_index++] =
'-';
8632 format[format_index++] =
'0';
8633 if (width_left && width_right) {
8637 while (i < 8 && width_left < width_right) {
8638 format[format_index++] = *width_left;
8646 found_valid_name =
false;
8647 parse_long_name = (**ptr ==
'{');
8648 if (parse_long_name)
8650 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8651 sizeof(__kmp_affinity_format_table[0]);
8653 char short_name = __kmp_affinity_format_table[i].short_name;
8654 const char *long_name = __kmp_affinity_format_table[i].long_name;
8655 char field_format = __kmp_affinity_format_table[i].field_format;
8656 if (parse_long_name) {
8657 size_t length = KMP_STRLEN(long_name);
8658 if (strncmp(*ptr, long_name, length) == 0) {
8659 found_valid_name =
true;
8662 }
else if (**ptr == short_name) {
8663 found_valid_name =
true;
8666 if (found_valid_name) {
8667 format[format_index++] = field_format;
8668 format[format_index++] =
'\0';
8669 absolute_short_name = short_name;
8673 if (parse_long_name) {
8675 absolute_short_name = 0;
8683 switch (absolute_short_name) {
8685 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8688 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8691 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8694 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8697 static const int BUFFER_SIZE = 256;
8698 char buf[BUFFER_SIZE];
8699 __kmp_expand_host_name(buf, BUFFER_SIZE);
8700 rc = __kmp_str_buf_print(field_buffer, format, buf);
8703 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8706 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8709 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8713 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8714 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8716#if KMP_AFFINITY_SUPPORTED
8719 __kmp_str_buf_init(&buf);
8720 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8721 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8722 __kmp_str_buf_free(&buf);
8728 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8730 if (parse_long_name) {
8739 KMP_ASSERT(format_index <= FORMAT_SIZE);
8749size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8750 kmp_str_buf_t *buffer) {
8751 const char *parse_ptr;
8753 const kmp_info_t *th;
8754 kmp_str_buf_t field;
8756 KMP_DEBUG_ASSERT(buffer);
8757 KMP_DEBUG_ASSERT(gtid >= 0);
8759 __kmp_str_buf_init(&field);
8760 __kmp_str_buf_clear(buffer);
8762 th = __kmp_threads[gtid];
8768 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8769 parse_ptr = __kmp_affinity_format;
8771 KMP_DEBUG_ASSERT(parse_ptr);
8773 while (*parse_ptr !=
'\0') {
8775 if (*parse_ptr ==
'%') {
8777 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8778 __kmp_str_buf_catbuf(buffer, &field);
8782 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8787 __kmp_str_buf_free(&field);
8792void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8794 __kmp_str_buf_init(&buf);
8795 __kmp_aux_capture_affinity(gtid, format, &buf);
8796 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8797 __kmp_str_buf_free(&buf);
8801void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8802 int blocktime = arg;
8808 __kmp_save_internal_controls(thread);
8811 if (blocktime < KMP_MIN_BLOCKTIME)
8812 blocktime = KMP_MIN_BLOCKTIME;
8813 else if (blocktime > KMP_MAX_BLOCKTIME)
8814 blocktime = KMP_MAX_BLOCKTIME;
8816 set__blocktime_team(thread->th.th_team, tid, blocktime);
8817 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8821 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8823 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8824 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8830 set__bt_set_team(thread->th.th_team, tid, bt_set);
8831 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8833 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8834 "bt_intervals=%d, monitor_updates=%d\n",
8835 __kmp_gtid_from_tid(tid, thread->th.th_team),
8836 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8837 __kmp_monitor_wakeups));
8839 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8840 __kmp_gtid_from_tid(tid, thread->th.th_team),
8841 thread->th.th_team->t.t_id, tid, blocktime));
8845void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8846 if (!__kmp_init_serial) {
8847 __kmp_serial_initialize();
8849 __kmp_env_initialize(str);
8851 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8859PACKED_REDUCTION_METHOD_T
8860__kmp_determine_reduction_method(
8861 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8862 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8863 kmp_critical_name *lck) {
8874 PACKED_REDUCTION_METHOD_T retval;
8878 KMP_DEBUG_ASSERT(lck);
8880#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8882 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8883#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8885 retval = critical_reduce_block;
8888 team_size = __kmp_get_team_num_threads(global_tid);
8889 if (team_size == 1) {
8891 retval = empty_reduce_block;
8895 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8897#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8898 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8899 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8901#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8902 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \
8903 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8905 int teamsize_cutoff = 4;
8907#if KMP_MIC_SUPPORTED
8908 if (__kmp_mic_type != non_mic) {
8909 teamsize_cutoff = 8;
8912 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8913 if (tree_available) {
8914 if (team_size <= teamsize_cutoff) {
8915 if (atomic_available) {
8916 retval = atomic_reduce_block;
8919 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8921 }
else if (atomic_available) {
8922 retval = atomic_reduce_block;
8925#error "Unknown or unsupported OS"
8930#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8931 KMP_ARCH_WASM || KMP_ARCH_PPC
8933#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8934 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \
8935 KMP_OS_WASI || KMP_OS_AIX
8939 if (atomic_available) {
8940 if (num_vars <= 2) {
8941 retval = atomic_reduce_block;
8947 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8948 if (atomic_available && (num_vars <= 3)) {
8949 retval = atomic_reduce_block;
8950 }
else if (tree_available) {
8951 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8952 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8953 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8958#error "Unknown or unsupported OS"
8962#error "Unknown or unsupported architecture"
8970 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8973 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8975 int atomic_available, tree_available;
8977 switch ((forced_retval = __kmp_force_reduction_method)) {
8978 case critical_reduce_block:
8982 case atomic_reduce_block:
8983 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8984 if (!atomic_available) {
8985 KMP_WARNING(RedMethodNotSupported,
"atomic");
8986 forced_retval = critical_reduce_block;
8990 case tree_reduce_block:
8991 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8992 if (!tree_available) {
8993 KMP_WARNING(RedMethodNotSupported,
"tree");
8994 forced_retval = critical_reduce_block;
8996#if KMP_FAST_REDUCTION_BARRIER
8997 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9006 retval = forced_retval;
9009 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
9011#undef FAST_REDUCTION_TREE_METHOD_GENERATED
9012#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9017kmp_int32 __kmp_get_reduce_method(
void) {
9018 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9023void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9027void __kmp_hard_pause() {
9028 __kmp_pause_status = kmp_hard_paused;
9029 __kmp_internal_end_thread(-1);
9033void __kmp_resume_if_soft_paused() {
9034 if (__kmp_pause_status == kmp_soft_paused) {
9035 __kmp_pause_status = kmp_not_paused;
9037 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9038 kmp_info_t *thread = __kmp_threads[gtid];
9040 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9042 if (fl.is_sleeping())
9044 else if (__kmp_try_suspend_mx(thread)) {
9045 __kmp_unlock_suspend_mx(thread);
9048 if (fl.is_sleeping()) {
9051 }
else if (__kmp_try_suspend_mx(thread)) {
9052 __kmp_unlock_suspend_mx(thread);
9064int __kmp_pause_resource(kmp_pause_status_t level) {
9065 if (level == kmp_not_paused) {
9066 if (__kmp_pause_status == kmp_not_paused) {
9070 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9071 __kmp_pause_status == kmp_hard_paused);
9072 __kmp_pause_status = kmp_not_paused;
9075 }
else if (level == kmp_soft_paused) {
9076 if (__kmp_pause_status != kmp_not_paused) {
9083 }
else if (level == kmp_hard_paused) {
9084 if (__kmp_pause_status != kmp_not_paused) {
9097void __kmp_omp_display_env(
int verbose) {
9098 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9099 if (__kmp_init_serial == 0)
9100 __kmp_do_serial_initialize();
9101 __kmp_display_env_impl(!verbose, verbose);
9102 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9106void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9108 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9110 kmp_info_t **other_threads = team->t.t_threads;
9114 for (
int f = 1; f < old_nthreads; ++f) {
9115 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9117 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9123 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9124 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9128 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9130 team->t.t_threads[f]->th.th_used_in_team.store(2);
9131 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9134 team->t.b->go_release();
9140 int count = old_nthreads - 1;
9142 count = old_nthreads - 1;
9143 for (
int f = 1; f < old_nthreads; ++f) {
9144 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9145 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9146 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9147 void *, other_threads[f]->th.th_sleep_loc);
9148 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9151 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9157 team->t.b->update_num_threads(new_nthreads);
9158 team->t.b->go_reset();
9161void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9163 KMP_DEBUG_ASSERT(team);
9169 for (
int f = 1; f < new_nthreads; ++f) {
9170 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9171 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9173 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9174 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9175 (kmp_flag_32<false, false> *)NULL);
9181 int count = new_nthreads - 1;
9183 count = new_nthreads - 1;
9184 for (
int f = 1; f < new_nthreads; ++f) {
9185 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9193kmp_info_t **__kmp_hidden_helper_threads;
9194kmp_info_t *__kmp_hidden_helper_main_thread;
9195std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9197kmp_int32 __kmp_hidden_helper_threads_num = 8;
9198kmp_int32 __kmp_enable_hidden_helper = TRUE;
9200kmp_int32 __kmp_hidden_helper_threads_num = 0;
9201kmp_int32 __kmp_enable_hidden_helper = FALSE;
9205std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9207void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9212 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9213 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9214 __kmp_hidden_helper_threads_num)
9220 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9221 __kmp_hidden_helper_initz_release();
9222 __kmp_hidden_helper_main_thread_wait();
9224 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9225 __kmp_hidden_helper_worker_thread_signal();
9231void __kmp_hidden_helper_threads_initz_routine() {
9233 const int gtid = __kmp_register_root(TRUE);
9234 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9235 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9236 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9237 __kmp_hidden_helper_threads_num;
9239 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9244 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9246 __kmp_hidden_helper_threads_deinitz_release();
9266void __kmp_init_nesting_mode() {
9267 int levels = KMP_HW_LAST;
9268 __kmp_nesting_mode_nlevels = levels;
9269 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9270 for (
int i = 0; i < levels; ++i)
9271 __kmp_nesting_nth_level[i] = 0;
9272 if (__kmp_nested_nth.size < levels) {
9273 __kmp_nested_nth.nth =
9274 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9275 __kmp_nested_nth.size = levels;
9280void __kmp_set_nesting_mode_threads() {
9281 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9283 if (__kmp_nesting_mode == 1)
9284 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9285 else if (__kmp_nesting_mode > 1)
9286 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9288 if (__kmp_topology) {
9290 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9291 loc < __kmp_nesting_mode_nlevels;
9292 loc++, hw_level++) {
9293 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9294 if (__kmp_nesting_nth_level[loc] == 1)
9298 if (__kmp_nesting_mode > 1 && loc > 1) {
9299 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9300 int num_cores = __kmp_topology->get_count(core_level);
9301 int upper_levels = 1;
9302 for (
int level = 0; level < loc - 1; ++level)
9303 upper_levels *= __kmp_nesting_nth_level[level];
9304 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9305 __kmp_nesting_nth_level[loc - 1] =
9306 num_cores / __kmp_nesting_nth_level[loc - 2];
9308 __kmp_nesting_mode_nlevels = loc;
9309 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9311 if (__kmp_avail_proc >= 4) {
9312 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9313 __kmp_nesting_nth_level[1] = 2;
9314 __kmp_nesting_mode_nlevels = 2;
9316 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9317 __kmp_nesting_mode_nlevels = 1;
9319 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9321 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9322 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9324 set__nproc(thread, __kmp_nesting_nth_level[0]);
9325 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9326 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9327 if (get__max_active_levels(thread) > 1) {
9329 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9331 if (__kmp_nesting_mode == 1)
9332 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9337#if !KMP_STATS_ENABLED
9338void __kmp_reset_stats() {}
9341int __kmp_omp_debug_struct_info = FALSE;
9342int __kmp_debugging = FALSE;
9344#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9345void __kmp_itt_fini_ittlib() {}
9346void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)