14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
29#include "kmp_dispatch_hier.h"
33#include "ompt-specific.h"
36#include "ompd-specific.h"
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79kmp_info_t __kmp_monitor;
84void __kmp_cleanup(
void);
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95static void __kmp_do_serial_initialize(
void);
96void __kmp_fork_barrier(
int gtid,
int tid);
97void __kmp_join_barrier(
int gtid);
98void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
99 kmp_internal_control_t *new_icvs,
ident_t *loc);
101#ifdef USE_LOAD_BALANCE
102static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
105static int __kmp_expand_threads(
int nNeed);
107static int __kmp_unregister_root_other_thread(
int gtid);
109static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
110kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
114void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
119int __kmp_get_global_thread_id() {
121 kmp_info_t **other_threads;
129 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
130 __kmp_nth, __kmp_all_nth));
137 if (!TCR_4(__kmp_init_gtid))
141 if (TCR_4(__kmp_gtid_mode) >= 3) {
142 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
146 if (TCR_4(__kmp_gtid_mode) >= 2) {
147 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
148 return __kmp_gtid_get_specific();
150 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
152 stack_addr = (
char *)&stack_data;
153 other_threads = __kmp_threads;
166 for (i = 0; i < __kmp_threads_capacity; i++) {
168 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
172 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
173 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
177 if (stack_addr <= stack_base) {
178 size_t stack_diff = stack_base - stack_addr;
180 if (stack_diff <= stack_size) {
187 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
188 __kmp_gtid_get_specific() == i);
196 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
197 "thread, using TLS\n"));
198 i = __kmp_gtid_get_specific();
209 if (!TCR_SYNC_PTR(other_threads[i]))
214 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
215 KMP_FATAL(StackOverflow, i);
218 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 if (stack_addr > stack_base) {
220 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
221 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
222 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
225 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
226 stack_base - stack_addr);
230 if (__kmp_storage_map) {
231 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
232 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
233 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
234 other_threads[i]->th.th_info.ds.ds_stacksize,
235 "th_%d stack (refinement)", i);
240int __kmp_get_global_thread_id_reg() {
243 if (!__kmp_init_serial) {
247 if (TCR_4(__kmp_gtid_mode) >= 3) {
248 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
252 if (TCR_4(__kmp_gtid_mode) >= 2) {
253 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
254 gtid = __kmp_gtid_get_specific();
257 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
258 gtid = __kmp_get_global_thread_id();
262 if (gtid == KMP_GTID_DNE) {
264 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
265 "Registering a new gtid.\n"));
266 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
267 if (!__kmp_init_serial) {
268 __kmp_do_serial_initialize();
269 gtid = __kmp_gtid_get_specific();
271 gtid = __kmp_register_root(FALSE);
273 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
277 KMP_DEBUG_ASSERT(gtid >= 0);
283void __kmp_check_stack_overlap(kmp_info_t *th) {
285 char *stack_beg = NULL;
286 char *stack_end = NULL;
289 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
290 if (__kmp_storage_map) {
291 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
292 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
294 gtid = __kmp_gtid_from_thread(th);
296 if (gtid == KMP_GTID_MONITOR) {
297 __kmp_print_storage_map_gtid(
298 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
299 "th_%s stack (%s)",
"mon",
300 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
302 __kmp_print_storage_map_gtid(
303 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
304 "th_%d stack (%s)", gtid,
305 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
311 gtid = __kmp_gtid_from_thread(th);
312 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
314 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
315 if (stack_beg == NULL) {
316 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
317 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
320 for (f = 0; f < __kmp_threads_capacity; f++) {
321 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
323 if (f_th && f_th != th) {
324 char *other_stack_end =
325 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
326 char *other_stack_beg =
327 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
328 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
329 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
332 if (__kmp_storage_map)
333 __kmp_print_storage_map_gtid(
334 -1, other_stack_beg, other_stack_end,
335 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
336 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
338 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
344 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
349void __kmp_infinite_loop(
void) {
350 static int done = FALSE;
357#define MAX_MESSAGE 512
359void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
360 char const *format, ...) {
361 char buffer[MAX_MESSAGE];
364 va_start(ap, format);
365 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
366 p2, (
unsigned long)size, format);
367 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
368 __kmp_vprintf(kmp_err, buffer, ap);
369#if KMP_PRINT_DATA_PLACEMENT
372 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
373 if (__kmp_storage_map_verbose) {
374 node = __kmp_get_host_node(p1);
376 __kmp_storage_map_verbose = FALSE;
380 int localProc = __kmp_get_cpu_from_gtid(gtid);
382 const int page_size = KMP_GET_PAGE_SIZE();
384 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
385 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
387 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
390 __kmp_printf_no_lock(
" GTID %d\n", gtid);
399 (
char *)p1 += page_size;
400 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
401 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
405 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
406 (
char *)p1 + (page_size - 1),
407 __kmp_get_host_node(p1));
409 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
410 (
char *)p2 + (page_size - 1),
411 __kmp_get_host_node(p2));
417 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
420 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
425void __kmp_warn(
char const *format, ...) {
426 char buffer[MAX_MESSAGE];
429 if (__kmp_generate_warnings == kmp_warnings_off) {
433 va_start(ap, format);
435 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
436 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
437 __kmp_vprintf(kmp_err, buffer, ap);
438 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
443void __kmp_abort_process() {
445 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
447 if (__kmp_debug_buf) {
448 __kmp_dump_debug_buffer();
454 __kmp_global.g.g_abort = SIGABRT;
468 __kmp_unregister_library();
472 __kmp_infinite_loop();
473 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
477void __kmp_abort_thread(
void) {
480 __kmp_infinite_loop();
486static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
487 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
491 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
493 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
494 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
496 __kmp_print_storage_map_gtid(
497 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
498 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
500 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
501 &thr->th.th_bar[bs_plain_barrier + 1],
502 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
506 &thr->th.th_bar[bs_forkjoin_barrier + 1],
507 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
510#if KMP_FAST_REDUCTION_BARRIER
511 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
512 &thr->th.th_bar[bs_reduction_barrier + 1],
513 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
521static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
522 int team_id,
int num_thr) {
523 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
524 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
527 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
528 &team->t.t_bar[bs_last_barrier],
529 sizeof(kmp_balign_team_t) * bs_last_barrier,
530 "%s_%d.t_bar", header, team_id);
532 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
533 &team->t.t_bar[bs_plain_barrier + 1],
534 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
537 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
538 &team->t.t_bar[bs_forkjoin_barrier + 1],
539 sizeof(kmp_balign_team_t),
540 "%s_%d.t_bar[forkjoin]", header, team_id);
542#if KMP_FAST_REDUCTION_BARRIER
543 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
544 &team->t.t_bar[bs_reduction_barrier + 1],
545 sizeof(kmp_balign_team_t),
546 "%s_%d.t_bar[reduction]", header, team_id);
549 __kmp_print_storage_map_gtid(
550 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
551 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
553 __kmp_print_storage_map_gtid(
554 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
555 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
557 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
558 &team->t.t_disp_buffer[num_disp_buff],
559 sizeof(dispatch_shared_info_t) * num_disp_buff,
560 "%s_%d.t_disp_buffer", header, team_id);
563static void __kmp_init_allocator() {
564 __kmp_init_memkind();
565 __kmp_init_target_mem();
567static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
571#if ENABLE_LIBOMPTARGET
572static void __kmp_init_omptarget() {
573 __kmp_init_target_task();
582BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
587 case DLL_PROCESS_ATTACH:
588 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
592 case DLL_PROCESS_DETACH:
593 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
606 if (lpReserved == NULL)
607 __kmp_internal_end_library(__kmp_gtid_get_specific());
611 case DLL_THREAD_ATTACH:
612 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
618 case DLL_THREAD_DETACH:
619 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
621 __kmp_internal_end_thread(__kmp_gtid_get_specific());
632void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
633 int gtid = *gtid_ref;
634#ifdef BUILD_PARALLEL_ORDERED
635 kmp_team_t *team = __kmp_team_from_gtid(gtid);
638 if (__kmp_env_consistency_check) {
639 if (__kmp_threads[gtid]->th.th_root->r.r_active)
640#if KMP_USE_DYNAMIC_LOCK
641 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
643 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
646#ifdef BUILD_PARALLEL_ORDERED
647 if (!team->t.t_serialized) {
649 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
657void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
658 int gtid = *gtid_ref;
659#ifdef BUILD_PARALLEL_ORDERED
660 int tid = __kmp_tid_from_gtid(gtid);
661 kmp_team_t *team = __kmp_team_from_gtid(gtid);
664 if (__kmp_env_consistency_check) {
665 if (__kmp_threads[gtid]->th.th_root->r.r_active)
666 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
668#ifdef BUILD_PARALLEL_ORDERED
669 if (!team->t.t_serialized) {
674 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
684int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
689 if (!TCR_4(__kmp_init_parallel))
690 __kmp_parallel_initialize();
691 __kmp_resume_if_soft_paused();
693 th = __kmp_threads[gtid];
694 team = th->th.th_team;
697 th->th.th_ident = id_ref;
699 if (team->t.t_serialized) {
702 kmp_int32 old_this = th->th.th_local.this_construct;
704 ++th->th.th_local.this_construct;
708 if (team->t.t_construct == old_this) {
709 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
710 th->th.th_local.this_construct);
713 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
714 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
715 team->t.t_active_level == 1) {
717 __kmp_itt_metadata_single(id_ref);
722 if (__kmp_env_consistency_check) {
723 if (status && push_ws) {
724 __kmp_push_workshare(gtid, ct_psingle, id_ref);
726 __kmp_check_workshare(gtid, ct_psingle, id_ref);
731 __kmp_itt_single_start(gtid);
737void __kmp_exit_single(
int gtid) {
739 __kmp_itt_single_end(gtid);
741 if (__kmp_env_consistency_check)
742 __kmp_pop_workshare(gtid, ct_psingle, NULL);
751static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
752 int master_tid,
int set_nthreads,
756 KMP_DEBUG_ASSERT(__kmp_init_serial);
757 KMP_DEBUG_ASSERT(root && parent_team);
758 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
762 new_nthreads = set_nthreads;
763 if (!get__dynamic_2(parent_team, master_tid)) {
766#ifdef USE_LOAD_BALANCE
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
768 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
769 if (new_nthreads == 1) {
770 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
771 "reservation to 1 thread\n",
775 if (new_nthreads < set_nthreads) {
776 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
777 "reservation to %d threads\n",
778 master_tid, new_nthreads));
782 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
783 new_nthreads = __kmp_avail_proc - __kmp_nth +
784 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
785 if (new_nthreads <= 1) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
787 "reservation to 1 thread\n",
791 if (new_nthreads < set_nthreads) {
792 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
793 "reservation to %d threads\n",
794 master_tid, new_nthreads));
796 new_nthreads = set_nthreads;
798 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
799 if (set_nthreads > 2) {
800 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
801 new_nthreads = (new_nthreads % set_nthreads) + 1;
802 if (new_nthreads == 1) {
803 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
804 "reservation to 1 thread\n",
808 if (new_nthreads < set_nthreads) {
809 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
810 "reservation to %d threads\n",
811 master_tid, new_nthreads));
819 if (__kmp_nth + new_nthreads -
820 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
822 int tl_nthreads = __kmp_max_nth - __kmp_nth +
823 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
824 if (tl_nthreads <= 0) {
829 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
830 __kmp_reserve_warn = 1;
831 __kmp_msg(kmp_ms_warning,
832 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
833 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
835 if (tl_nthreads == 1) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
837 "reduced reservation to 1 thread\n",
841 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
842 "reservation to %d threads\n",
843 master_tid, tl_nthreads));
844 new_nthreads = tl_nthreads;
848 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
849 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
850 if (cg_nthreads + new_nthreads -
851 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
853 int tl_nthreads = max_cg_threads - cg_nthreads +
854 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
855 if (tl_nthreads <= 0) {
860 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
861 __kmp_reserve_warn = 1;
862 __kmp_msg(kmp_ms_warning,
863 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
864 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
866 if (tl_nthreads == 1) {
867 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
868 "reduced reservation to 1 thread\n",
872 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
873 "reservation to %d threads\n",
874 master_tid, tl_nthreads));
875 new_nthreads = tl_nthreads;
881 capacity = __kmp_threads_capacity;
882 if (TCR_PTR(__kmp_threads[0]) == NULL) {
888 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
889 capacity -= __kmp_hidden_helper_threads_num;
891 if (__kmp_nth + new_nthreads -
892 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
895 int slotsRequired = __kmp_nth + new_nthreads -
896 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
898 int slotsAdded = __kmp_expand_threads(slotsRequired);
899 if (slotsAdded < slotsRequired) {
901 new_nthreads -= (slotsRequired - slotsAdded);
902 KMP_ASSERT(new_nthreads >= 1);
905 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
906 __kmp_reserve_warn = 1;
907 if (__kmp_tp_cached) {
908 __kmp_msg(kmp_ms_warning,
909 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
910 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
911 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
913 __kmp_msg(kmp_ms_warning,
914 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
915 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
922 if (new_nthreads == 1) {
924 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
925 "dead roots and rechecking; requested %d threads\n",
926 __kmp_get_gtid(), set_nthreads));
928 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
930 __kmp_get_gtid(), new_nthreads, set_nthreads));
939static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
940 kmp_info_t *master_th,
int master_gtid,
941 int fork_teams_workers) {
945 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
946 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
950 master_th->th.th_info.ds.ds_tid = 0;
951 master_th->th.th_team = team;
952 master_th->th.th_team_nproc = team->t.t_nproc;
953 master_th->th.th_team_master = master_th;
954 master_th->th.th_team_serialized = FALSE;
955 master_th->th.th_dispatch = &team->t.t_dispatch[0];
958#if KMP_NESTED_HOT_TEAMS
960 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
963 int level = team->t.t_active_level - 1;
964 if (master_th->th.th_teams_microtask) {
965 if (master_th->th.th_teams_size.nteams > 1) {
969 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
970 master_th->th.th_teams_level == team->t.t_level) {
975 if (level < __kmp_hot_teams_max_level) {
976 if (hot_teams[level].hot_team) {
978 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
982 hot_teams[level].hot_team = team;
983 hot_teams[level].hot_team_nth = team->t.t_nproc;
990 use_hot_team = team == root->r.r_hot_team;
995 team->t.t_threads[0] = master_th;
996 __kmp_initialize_info(master_th, team, 0, master_gtid);
999 for (i = 1; i < team->t.t_nproc; i++) {
1002 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1003 team->t.t_threads[i] = thr;
1004 KMP_DEBUG_ASSERT(thr);
1005 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1007 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1008 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1009 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1010 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1011 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1012 team->t.t_bar[bs_plain_barrier].b_arrived));
1013 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1014 thr->th.th_teams_level = master_th->th.th_teams_level;
1015 thr->th.th_teams_size = master_th->th.th_teams_size;
1018 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1019 for (b = 0; b < bs_last_barrier; ++b) {
1020 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1021 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1023 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1029#if KMP_AFFINITY_SUPPORTED
1033 if (!fork_teams_workers) {
1034 __kmp_partition_places(team);
1038 if (team->t.t_nproc > 1 &&
1039 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1040 team->t.b->update_num_threads(team->t.t_nproc);
1041 __kmp_add_threads_to_team(team, team->t.t_nproc);
1046 if (__kmp_tasking_mode != tskm_immediate_exec) {
1048 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1051 (
"__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1052 "%p, new task_team %p / team %p\n",
1053 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1054 team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1058 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1059 master_th->th.th_task_state);
1063 if (team->t.t_nproc > 1) {
1064 KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
1065 team->t.t_threads[1]->th.th_task_state == 1);
1066 KMP_CHECK_UPDATE(master_th->th.th_task_state,
1067 team->t.t_threads[1]->th.th_task_state);
1069 master_th->th.th_task_state = 0;
1073 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1074 master_th->th.th_task_state);
1076 master_th->th.th_task_state = 0;
1080 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1081 for (i = 0; i < team->t.t_nproc; i++) {
1082 kmp_info_t *thr = team->t.t_threads[i];
1083 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1084 thr->th.th_prev_level != team->t.t_level) {
1085 team->t.t_display_affinity = 1;
1094#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1098inline static void propagateFPControl(kmp_team_t *team) {
1099 if (__kmp_inherit_fp_control) {
1100 kmp_int16 x87_fpu_control_word;
1104 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1105 __kmp_store_mxcsr(&mxcsr);
1106 mxcsr &= KMP_X86_MXCSR_MASK;
1117 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1118 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1121 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1125 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1131inline static void updateHWFPControl(kmp_team_t *team) {
1132 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1135 kmp_int16 x87_fpu_control_word;
1137 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1138 __kmp_store_mxcsr(&mxcsr);
1139 mxcsr &= KMP_X86_MXCSR_MASK;
1141 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1142 __kmp_clear_x87_fpu_status_word();
1143 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1146 if (team->t.t_mxcsr != mxcsr) {
1147 __kmp_load_mxcsr(&team->t.t_mxcsr);
1152#define propagateFPControl(x) ((void)0)
1153#define updateHWFPControl(x) ((void)0)
1156static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1161void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1162 kmp_info_t *this_thr;
1163 kmp_team_t *serial_team;
1165 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1172 if (!TCR_4(__kmp_init_parallel))
1173 __kmp_parallel_initialize();
1174 __kmp_resume_if_soft_paused();
1176 this_thr = __kmp_threads[global_tid];
1177 serial_team = this_thr->th.th_serial_team;
1180 KMP_DEBUG_ASSERT(serial_team);
1183 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1184 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1185 proc_bind = proc_bind_false;
1186 }
else if (proc_bind == proc_bind_default) {
1189 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1192 this_thr->th.th_set_proc_bind = proc_bind_default;
1195 this_thr->th.th_set_nproc = 0;
1198 ompt_data_t ompt_parallel_data = ompt_data_none;
1199 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1200 if (ompt_enabled.enabled &&
1201 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1203 ompt_task_info_t *parent_task_info;
1204 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1206 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1207 if (ompt_enabled.ompt_callback_parallel_begin) {
1210 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1211 &(parent_task_info->task_data), &(parent_task_info->frame),
1212 &ompt_parallel_data, team_size,
1213 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1218 if (this_thr->th.th_team != serial_team) {
1220 int level = this_thr->th.th_team->t.t_level;
1222 if (serial_team->t.t_serialized) {
1225 kmp_team_t *new_team;
1227 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1230 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1234 proc_bind, &this_thr->th.th_current_task->td_icvs,
1235 0 USE_NESTED_HOT_ARG(NULL));
1236 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1237 KMP_ASSERT(new_team);
1240 new_team->t.t_threads[0] = this_thr;
1241 new_team->t.t_parent = this_thr->th.th_team;
1242 serial_team = new_team;
1243 this_thr->th.th_serial_team = serial_team;
1247 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1248 global_tid, serial_team));
1256 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1257 global_tid, serial_team));
1261 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1262 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1263 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1264 serial_team->t.t_ident = loc;
1265 serial_team->t.t_serialized = 1;
1266 serial_team->t.t_nproc = 1;
1267 serial_team->t.t_parent = this_thr->th.th_team;
1269 serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1270 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1271 this_thr->th.th_team = serial_team;
1272 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1274 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1275 this_thr->th.th_current_task));
1276 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1277 this_thr->th.th_current_task->td_flags.executing = 0;
1279 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1284 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1285 &this_thr->th.th_current_task->td_parent->td_icvs);
1289 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1290 this_thr->th.th_current_task->td_icvs.nproc =
1291 __kmp_nested_nth.nth[level + 1];
1294 if (__kmp_nested_proc_bind.used &&
1295 (level + 1 < __kmp_nested_proc_bind.used)) {
1296 this_thr->th.th_current_task->td_icvs.proc_bind =
1297 __kmp_nested_proc_bind.bind_types[level + 1];
1301 serial_team->t.t_pkfn = (microtask_t)(~0);
1303 this_thr->th.th_info.ds.ds_tid = 0;
1306 this_thr->th.th_team_nproc = 1;
1307 this_thr->th.th_team_master = this_thr;
1308 this_thr->th.th_team_serialized = 1;
1309 this_thr->th.th_task_team = NULL;
1310 this_thr->th.th_task_state = 0;
1312 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1313 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1314 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1316 propagateFPControl(serial_team);
1319 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1320 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1321 serial_team->t.t_dispatch->th_disp_buffer =
1322 (dispatch_private_info_t *)__kmp_allocate(
1323 sizeof(dispatch_private_info_t));
1325 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1332 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1333 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1334 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1335 ++serial_team->t.t_serialized;
1336 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1339 int level = this_thr->th.th_team->t.t_level;
1342 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1343 this_thr->th.th_current_task->td_icvs.nproc =
1344 __kmp_nested_nth.nth[level + 1];
1346 serial_team->t.t_level++;
1347 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1348 "of serial team %p to %d\n",
1349 global_tid, serial_team, serial_team->t.t_level));
1352 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1354 dispatch_private_info_t *disp_buffer =
1355 (dispatch_private_info_t *)__kmp_allocate(
1356 sizeof(dispatch_private_info_t));
1357 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1358 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1360 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1363 __kmp_push_task_team_node(this_thr, serial_team);
1367 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1371 if (__kmp_display_affinity) {
1372 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1373 this_thr->th.th_prev_num_threads != 1) {
1375 __kmp_aux_display_affinity(global_tid, NULL);
1376 this_thr->th.th_prev_level = serial_team->t.t_level;
1377 this_thr->th.th_prev_num_threads = 1;
1381 if (__kmp_env_consistency_check)
1382 __kmp_push_parallel(global_tid, NULL);
1384 serial_team->t.ompt_team_info.master_return_address = codeptr;
1385 if (ompt_enabled.enabled &&
1386 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1387 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1388 OMPT_GET_FRAME_ADDRESS(0);
1390 ompt_lw_taskteam_t lw_taskteam;
1391 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1392 &ompt_parallel_data, codeptr);
1394 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1398 if (ompt_enabled.ompt_callback_implicit_task) {
1399 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1400 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1401 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1402 ompt_task_implicit);
1403 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1404 __kmp_tid_from_gtid(global_tid);
1408 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1409 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1410 OMPT_GET_FRAME_ADDRESS(0);
1416static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1417 microtask_t microtask,
int level,
1418 int teams_level, kmp_va_list ap) {
1419 return (master_th->th.th_teams_microtask && ap &&
1420 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1425static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1426 int teams_level, kmp_va_list ap) {
1427 return ((ap == NULL && active_level == 0) ||
1428 (ap && teams_level > 0 && teams_level == level));
1435__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1436 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1437 enum fork_context_e call_context, microtask_t microtask,
1438 launch_t invoker,
int master_set_numthreads,
int level,
1440 ompt_data_t ompt_parallel_data,
void *return_address,
1446 parent_team->t.t_ident = loc;
1447 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1448 parent_team->t.t_argc = argc;
1449 argv = (
void **)parent_team->t.t_argv;
1450 for (i = argc - 1; i >= 0; --i) {
1451 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1454 if (parent_team == master_th->th.th_serial_team) {
1457 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1459 if (call_context == fork_context_gnu) {
1462 parent_team->t.t_serialized--;
1467 parent_team->t.t_pkfn = microtask;
1472 void **exit_frame_p;
1473 ompt_data_t *implicit_task_data;
1474 ompt_lw_taskteam_t lw_taskteam;
1476 if (ompt_enabled.enabled) {
1477 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1478 &ompt_parallel_data, return_address);
1479 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1481 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1485 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1486 if (ompt_enabled.ompt_callback_implicit_task) {
1487 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1488 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1489 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1490 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1494 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1496 exit_frame_p = &dummy;
1502 parent_team->t.t_serialized--;
1505 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1506 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1507 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1516 if (ompt_enabled.enabled) {
1517 *exit_frame_p = NULL;
1518 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1519 if (ompt_enabled.ompt_callback_implicit_task) {
1520 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1521 ompt_scope_end, NULL, implicit_task_data, 1,
1522 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1524 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1525 __ompt_lw_taskteam_unlink(master_th);
1526 if (ompt_enabled.ompt_callback_parallel_end) {
1527 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1528 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1529 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1531 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1537 parent_team->t.t_pkfn = microtask;
1538 parent_team->t.t_invoke = invoker;
1539 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1540 parent_team->t.t_active_level++;
1541 parent_team->t.t_level++;
1542 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1549 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1552 if (ompt_enabled.enabled) {
1553 ompt_lw_taskteam_t lw_taskteam;
1554 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1556 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1561 if (master_set_numthreads) {
1562 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1564 kmp_info_t **other_threads = parent_team->t.t_threads;
1567 int old_proc = master_th->th.th_teams_size.nth;
1568 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1569 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1570 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1572 parent_team->t.t_nproc = master_set_numthreads;
1573 for (i = 0; i < master_set_numthreads; ++i) {
1574 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1578 master_th->th.th_set_nproc = 0;
1582 if (__kmp_debugging) {
1583 int nth = __kmp_omp_num_threads(loc);
1585 master_set_numthreads = nth;
1591 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1593 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1594 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1595 proc_bind = proc_bind_false;
1598 if (proc_bind == proc_bind_default) {
1599 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1605 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1606 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1607 master_th->th.th_current_task->td_icvs.proc_bind)) {
1608 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1611 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1613 if (proc_bind_icv != proc_bind_default &&
1614 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1615 kmp_info_t **other_threads = parent_team->t.t_threads;
1616 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1617 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1621 master_th->th.th_set_proc_bind = proc_bind_default;
1623#if USE_ITT_BUILD && USE_ITT_NOTIFY
1624 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1626 __kmp_forkjoin_frames_mode == 3 &&
1627 parent_team->t.t_active_level == 1
1628 && master_th->th.th_teams_size.nteams == 1) {
1629 kmp_uint64 tmp_time = __itt_get_timestamp();
1630 master_th->th.th_frame_time = tmp_time;
1631 parent_team->t.t_region_time = tmp_time;
1633 if (__itt_stack_caller_create_ptr) {
1634 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1636 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1639#if KMP_AFFINITY_SUPPORTED
1640 __kmp_partition_places(parent_team);
1643 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1644 "master_th=%p, gtid=%d\n",
1645 root, parent_team, master_th, gtid));
1646 __kmp_internal_fork(loc, gtid, parent_team);
1647 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1648 "master_th=%p, gtid=%d\n",
1649 root, parent_team, master_th, gtid));
1651 if (call_context == fork_context_gnu)
1655 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1656 parent_team->t.t_id, parent_team->t.t_pkfn));
1658 if (!parent_team->t.t_invoke(gtid)) {
1659 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1661 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1662 parent_team->t.t_id, parent_team->t.t_pkfn));
1665 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1672__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1673 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1674 kmp_info_t *master_th, kmp_team_t *parent_team,
1676 ompt_data_t *ompt_parallel_data,
void **return_address,
1677 ompt_data_t **parent_task_data,
1685#if KMP_OS_LINUX && \
1686 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1689 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1694 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1699 master_th->th.th_serial_team->t.t_pkfn = microtask;
1702 if (call_context == fork_context_intel) {
1704 master_th->th.th_serial_team->t.t_ident = loc;
1707 master_th->th.th_serial_team->t.t_level--;
1712 void **exit_frame_p;
1713 ompt_task_info_t *task_info;
1714 ompt_lw_taskteam_t lw_taskteam;
1716 if (ompt_enabled.enabled) {
1717 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1718 ompt_parallel_data, *return_address);
1720 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1722 task_info = OMPT_CUR_TASK_INFO(master_th);
1723 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1724 if (ompt_enabled.ompt_callback_implicit_task) {
1725 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1726 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1727 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1728 &(task_info->task_data), 1,
1729 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1733 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1735 exit_frame_p = &dummy;
1740 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1741 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1742 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1751 if (ompt_enabled.enabled) {
1752 *exit_frame_p = NULL;
1753 if (ompt_enabled.ompt_callback_implicit_task) {
1754 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1755 ompt_scope_end, NULL, &(task_info->task_data), 1,
1756 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1758 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1759 __ompt_lw_taskteam_unlink(master_th);
1760 if (ompt_enabled.ompt_callback_parallel_end) {
1761 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1762 ompt_parallel_data, *parent_task_data,
1763 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1765 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1768 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1769 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1770 team = master_th->th.th_team;
1772 team->t.t_invoke = invoker;
1773 __kmp_alloc_argv_entries(argc, team, TRUE);
1774 team->t.t_argc = argc;
1775 argv = (
void **)team->t.t_argv;
1776 for (i = argc - 1; i >= 0; --i)
1777 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1784 if (ompt_enabled.enabled) {
1785 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1786 if (ompt_enabled.ompt_callback_implicit_task) {
1787 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1788 ompt_scope_end, NULL, &(task_info->task_data), 0,
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1791 if (ompt_enabled.ompt_callback_parallel_end) {
1792 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1793 ompt_parallel_data, *parent_task_data,
1794 OMPT_INVOKER(call_context) | ompt_parallel_league,
1797 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1802 for (i = argc - 1; i >= 0; --i)
1803 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1808 void **exit_frame_p;
1809 ompt_task_info_t *task_info;
1810 ompt_lw_taskteam_t lw_taskteam;
1811 ompt_data_t *implicit_task_data;
1813 if (ompt_enabled.enabled) {
1814 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1815 ompt_parallel_data, *return_address);
1816 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1818 task_info = OMPT_CUR_TASK_INFO(master_th);
1819 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1822 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1823 if (ompt_enabled.ompt_callback_implicit_task) {
1824 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1825 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1826 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1827 ompt_task_implicit);
1828 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1832 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1834 exit_frame_p = &dummy;
1839 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1840 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1841 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1850 if (ompt_enabled.enabled) {
1851 *exit_frame_p = NULL;
1852 if (ompt_enabled.ompt_callback_implicit_task) {
1853 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1854 ompt_scope_end, NULL, &(task_info->task_data), 1,
1855 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1858 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1859 __ompt_lw_taskteam_unlink(master_th);
1860 if (ompt_enabled.ompt_callback_parallel_end) {
1861 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1862 ompt_parallel_data, *parent_task_data,
1863 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1865 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1869 }
else if (call_context == fork_context_gnu) {
1871 if (ompt_enabled.enabled) {
1872 ompt_lw_taskteam_t lwt;
1873 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1876 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1877 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1883 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1886 KMP_ASSERT2(call_context < fork_context_last,
1887 "__kmp_serial_fork_call: unknown fork_context parameter");
1890 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1897int __kmp_fork_call(
ident_t *loc,
int gtid,
1898 enum fork_context_e call_context,
1899 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1904 int master_this_cons;
1906 kmp_team_t *parent_team;
1907 kmp_info_t *master_th;
1911 int master_set_numthreads;
1912 int task_thread_limit = 0;
1916#if KMP_NESTED_HOT_TEAMS
1917 kmp_hot_team_ptr_t **p_hot_teams;
1920 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1923 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1924 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1927 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1929 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1930 __kmp_stkpadding += (short)((kmp_int64)dummy);
1936 if (!TCR_4(__kmp_init_parallel))
1937 __kmp_parallel_initialize();
1938 __kmp_resume_if_soft_paused();
1943 master_th = __kmp_threads[gtid];
1945 parent_team = master_th->th.th_team;
1946 master_tid = master_th->th.th_info.ds.ds_tid;
1947 master_this_cons = master_th->th.th_local.this_construct;
1948 root = master_th->th.th_root;
1949 master_active = root->r.r_active;
1950 master_set_numthreads = master_th->th.th_set_nproc;
1952 master_th->th.th_current_task->td_icvs.task_thread_limit;
1955 ompt_data_t ompt_parallel_data = ompt_data_none;
1956 ompt_data_t *parent_task_data;
1957 ompt_frame_t *ompt_frame;
1958 void *return_address = NULL;
1960 if (ompt_enabled.enabled) {
1961 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1963 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1968 __kmp_assign_root_init_mask();
1971 level = parent_team->t.t_level;
1973 active_level = parent_team->t.t_active_level;
1975 teams_level = master_th->th.th_teams_level;
1976#if KMP_NESTED_HOT_TEAMS
1977 p_hot_teams = &master_th->th.th_hot_teams;
1978 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1979 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1980 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1981 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1983 (*p_hot_teams)[0].hot_team_nth = 1;
1988 if (ompt_enabled.enabled) {
1989 if (ompt_enabled.ompt_callback_parallel_begin) {
1990 int team_size = master_set_numthreads
1991 ? master_set_numthreads
1992 : get__nproc_2(parent_team, master_tid);
1993 int flags = OMPT_INVOKER(call_context) |
1994 ((microtask == (microtask_t)__kmp_teams_master)
1995 ? ompt_parallel_league
1996 : ompt_parallel_team);
1997 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1998 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
2001 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2005 master_th->th.th_ident = loc;
2008 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
2009 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
2010 call_context, microtask, invoker,
2011 master_set_numthreads, level,
2013 ompt_parallel_data, return_address,
2022 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
2026 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2027 if ((!enter_teams &&
2028 (parent_team->t.t_active_level >=
2029 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2030 (__kmp_library == library_serial)) {
2031 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2034 nthreads = master_set_numthreads
2035 ? master_set_numthreads
2037 : get__nproc_2(parent_team, master_tid);
2040 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2047 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2052 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2053 nthreads, enter_teams);
2054 if (nthreads == 1) {
2058 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2062 KMP_DEBUG_ASSERT(nthreads > 0);
2065 master_th->th.th_set_nproc = 0;
2067 if (nthreads == 1) {
2068 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2069 invoker, master_th, parent_team,
2071 &ompt_parallel_data, &return_address,
2079 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2080 "curtask=%p, curtask_max_aclevel=%d\n",
2081 parent_team->t.t_active_level, master_th,
2082 master_th->th.th_current_task,
2083 master_th->th.th_current_task->td_icvs.max_active_levels));
2087 master_th->th.th_current_task->td_flags.executing = 0;
2089 if (!master_th->th.th_teams_microtask || level > teams_level) {
2091 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2095 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2096 if ((level + 1 < __kmp_nested_nth.used) &&
2097 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2098 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2104 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2106 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2107 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2108 proc_bind = proc_bind_false;
2112 if (proc_bind == proc_bind_default) {
2113 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2116 if (master_th->th.th_teams_microtask &&
2117 microtask == (microtask_t)__kmp_teams_master) {
2118 proc_bind = __kmp_teams_proc_bind;
2124 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2125 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2126 master_th->th.th_current_task->td_icvs.proc_bind)) {
2129 if (!master_th->th.th_teams_microtask ||
2130 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2131 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2136 master_th->th.th_set_proc_bind = proc_bind_default;
2138 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2139 kmp_internal_control_t new_icvs;
2140 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2141 new_icvs.next = NULL;
2142 if (nthreads_icv > 0) {
2143 new_icvs.nproc = nthreads_icv;
2145 if (proc_bind_icv != proc_bind_default) {
2146 new_icvs.proc_bind = proc_bind_icv;
2150 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2151 team = __kmp_allocate_team(root, nthreads, nthreads,
2155 proc_bind, &new_icvs,
2156 argc USE_NESTED_HOT_ARG(master_th));
2157 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2158 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2161 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2162 team = __kmp_allocate_team(root, nthreads, nthreads,
2167 &master_th->th.th_current_task->td_icvs,
2168 argc USE_NESTED_HOT_ARG(master_th));
2169 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2170 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2171 &master_th->th.th_current_task->td_icvs);
2174 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2177 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2178 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2179 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2180 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2181 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2183 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2186 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2188 if (!master_th->th.th_teams_microtask || level > teams_level) {
2189 int new_level = parent_team->t.t_level + 1;
2190 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2191 new_level = parent_team->t.t_active_level + 1;
2192 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2195 int new_level = parent_team->t.t_level;
2196 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2197 new_level = parent_team->t.t_active_level;
2198 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2200 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2202 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2204 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2205 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2208 propagateFPControl(team);
2210 if (ompd_state & OMPD_ENABLE_BP)
2211 ompd_bp_parallel_begin();
2216 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2217 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2219 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2220 (team->t.t_master_tid == 0 &&
2221 (team->t.t_parent == root->r.r_root_team ||
2222 team->t.t_parent->t.t_serialized)));
2226 argv = (
void **)team->t.t_argv;
2228 for (i = argc - 1; i >= 0; --i) {
2229 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2230 KMP_CHECK_UPDATE(*argv, new_argv);
2234 for (i = 0; i < argc; ++i) {
2236 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2241 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2242 if (!root->r.r_active)
2243 root->r.r_active = TRUE;
2245 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2246 __kmp_setup_icv_copy(team, nthreads,
2247 &master_th->th.th_current_task->td_icvs, loc);
2250 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2253 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2256 if (team->t.t_active_level == 1
2257 && !master_th->th.th_teams_microtask) {
2259 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2260 (__kmp_forkjoin_frames_mode == 3 ||
2261 __kmp_forkjoin_frames_mode == 1)) {
2262 kmp_uint64 tmp_time = 0;
2263 if (__itt_get_timestamp_ptr)
2264 tmp_time = __itt_get_timestamp();
2266 master_th->th.th_frame_time = tmp_time;
2267 if (__kmp_forkjoin_frames_mode == 3)
2268 team->t.t_region_time = tmp_time;
2272 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2273 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2275 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2281 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2284 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2285 root, team, master_th, gtid));
2288 if (__itt_stack_caller_create_ptr) {
2291 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2292 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2293 }
else if (parent_team->t.t_serialized) {
2298 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2299 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2307 __kmp_internal_fork(loc, gtid, team);
2308 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2309 "master_th=%p, gtid=%d\n",
2310 root, team, master_th, gtid));
2313 if (call_context == fork_context_gnu) {
2314 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2319 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2320 team->t.t_id, team->t.t_pkfn));
2323#if KMP_STATS_ENABLED
2327 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2331 if (!team->t.t_invoke(gtid)) {
2332 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2335#if KMP_STATS_ENABLED
2338 KMP_SET_THREAD_STATE(previous_state);
2342 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2343 team->t.t_id, team->t.t_pkfn));
2346 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2348 if (ompt_enabled.enabled) {
2349 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2357static inline void __kmp_join_restore_state(kmp_info_t *thread,
2360 thread->th.ompt_thread_info.state =
2361 ((team->t.t_serialized) ? ompt_state_work_serial
2362 : ompt_state_work_parallel);
2365static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2366 kmp_team_t *team, ompt_data_t *parallel_data,
2367 int flags,
void *codeptr) {
2368 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2369 if (ompt_enabled.ompt_callback_parallel_end) {
2370 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2371 parallel_data, &(task_info->task_data), flags, codeptr);
2374 task_info->frame.enter_frame = ompt_data_none;
2375 __kmp_join_restore_state(thread, team);
2379void __kmp_join_call(
ident_t *loc,
int gtid
2382 enum fork_context_e fork_context
2386 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2388 kmp_team_t *parent_team;
2389 kmp_info_t *master_th;
2393 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2396 master_th = __kmp_threads[gtid];
2397 root = master_th->th.th_root;
2398 team = master_th->th.th_team;
2399 parent_team = team->t.t_parent;
2401 master_th->th.th_ident = loc;
2404 void *team_microtask = (
void *)team->t.t_pkfn;
2408 if (ompt_enabled.enabled &&
2409 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2410 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2415 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2416 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2417 "th_task_team = %p\n",
2418 __kmp_gtid_from_thread(master_th), team,
2419 team->t.t_task_team[master_th->th.th_task_state],
2420 master_th->th.th_task_team));
2421 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
2425 if (team->t.t_serialized) {
2426 if (master_th->th.th_teams_microtask) {
2428 int level = team->t.t_level;
2429 int tlevel = master_th->th.th_teams_level;
2430 if (level == tlevel) {
2434 }
else if (level == tlevel + 1) {
2438 team->t.t_serialized++;
2444 if (ompt_enabled.enabled) {
2445 if (fork_context == fork_context_gnu) {
2446 __ompt_lw_taskteam_unlink(master_th);
2448 __kmp_join_restore_state(master_th, parent_team);
2455 master_active = team->t.t_master_active;
2460 __kmp_internal_join(loc, gtid, team);
2462 if (__itt_stack_caller_create_ptr) {
2463 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2465 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2466 team->t.t_stack_id = NULL;
2470 master_th->th.th_task_state =
2473 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2474 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2478 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2479 parent_team->t.t_stack_id = NULL;
2487 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2488 void *codeptr = team->t.ompt_team_info.master_return_address;
2493 if (team->t.t_active_level == 1 &&
2494 (!master_th->th.th_teams_microtask ||
2495 master_th->th.th_teams_size.nteams == 1)) {
2496 master_th->th.th_ident = loc;
2499 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2500 __kmp_forkjoin_frames_mode == 3)
2501 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2502 master_th->th.th_frame_time, 0, loc,
2503 master_th->th.th_team_nproc, 1);
2504 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2505 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2506 __kmp_itt_region_joined(gtid);
2510#if KMP_AFFINITY_SUPPORTED
2513 master_th->th.th_first_place = team->t.t_first_place;
2514 master_th->th.th_last_place = team->t.t_last_place;
2518 if (master_th->th.th_teams_microtask && !exit_teams &&
2519 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2520 team->t.t_level == master_th->th.th_teams_level + 1) {
2525 ompt_data_t ompt_parallel_data = ompt_data_none;
2526 if (ompt_enabled.enabled) {
2527 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2528 if (ompt_enabled.ompt_callback_implicit_task) {
2529 int ompt_team_size = team->t.t_nproc;
2530 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2531 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2532 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2534 task_info->frame.exit_frame = ompt_data_none;
2535 task_info->task_data = ompt_data_none;
2536 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2537 __ompt_lw_taskteam_unlink(master_th);
2542 team->t.t_active_level--;
2543 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2549 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2550 int old_num = master_th->th.th_team_nproc;
2551 int new_num = master_th->th.th_teams_size.nth;
2552 kmp_info_t **other_threads = team->t.t_threads;
2553 team->t.t_nproc = new_num;
2554 for (
int i = 0; i < old_num; ++i) {
2555 other_threads[i]->th.th_team_nproc = new_num;
2558 for (
int i = old_num; i < new_num; ++i) {
2560 KMP_DEBUG_ASSERT(other_threads[i]);
2561 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2562 for (
int b = 0; b < bs_last_barrier; ++b) {
2563 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2564 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2566 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2569 if (__kmp_tasking_mode != tskm_immediate_exec) {
2571 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2577 if (ompt_enabled.enabled) {
2578 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2579 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2587 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2588 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2590 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2595 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2597 if (!master_th->th.th_teams_microtask ||
2598 team->t.t_level > master_th->th.th_teams_level) {
2600 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2602 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2605 if (ompt_enabled.enabled) {
2606 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2607 if (ompt_enabled.ompt_callback_implicit_task) {
2608 int flags = (team_microtask == (
void *)__kmp_teams_master)
2610 : ompt_task_implicit;
2611 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2612 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2613 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2614 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2616 task_info->frame.exit_frame = ompt_data_none;
2617 task_info->task_data = ompt_data_none;
2621 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2623 __kmp_pop_current_task_from_thread(master_th);
2625 master_th->th.th_def_allocator = team->t.t_def_allocator;
2628 if (ompd_state & OMPD_ENABLE_BP)
2629 ompd_bp_parallel_end();
2631 updateHWFPControl(team);
2633 if (root->r.r_active != master_active)
2634 root->r.r_active = master_active;
2636 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2644 master_th->th.th_team = parent_team;
2645 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2646 master_th->th.th_team_master = parent_team->t.t_threads[0];
2647 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2650 if (parent_team->t.t_serialized &&
2651 parent_team != master_th->th.th_serial_team &&
2652 parent_team != root->r.r_root_team) {
2653 __kmp_free_team(root,
2654 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2655 master_th->th.th_serial_team = parent_team;
2658 if (__kmp_tasking_mode != tskm_immediate_exec) {
2660 KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
2661 team->t.t_primary_task_state == 1);
2662 master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2665 master_th->th.th_task_team =
2666 parent_team->t.t_task_team[master_th->th.th_task_state];
2668 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2669 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2676 master_th->th.th_current_task->td_flags.executing = 1;
2678 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2680#if KMP_AFFINITY_SUPPORTED
2681 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2682 __kmp_reset_root_init_mask(gtid);
2687 OMPT_INVOKER(fork_context) |
2688 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2689 : ompt_parallel_team);
2690 if (ompt_enabled.enabled) {
2691 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2697 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2702void __kmp_save_internal_controls(kmp_info_t *thread) {
2704 if (thread->th.th_team != thread->th.th_serial_team) {
2707 if (thread->th.th_team->t.t_serialized > 1) {
2710 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2713 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2714 thread->th.th_team->t.t_serialized) {
2719 kmp_internal_control_t *control =
2720 (kmp_internal_control_t *)__kmp_allocate(
2721 sizeof(kmp_internal_control_t));
2723 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2725 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2727 control->next = thread->th.th_team->t.t_control_stack_top;
2728 thread->th.th_team->t.t_control_stack_top = control;
2734void __kmp_set_num_threads(
int new_nth,
int gtid) {
2738 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2739 KMP_DEBUG_ASSERT(__kmp_init_serial);
2743 else if (new_nth > __kmp_max_nth)
2744 new_nth = __kmp_max_nth;
2747 thread = __kmp_threads[gtid];
2748 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2751 __kmp_save_internal_controls(thread);
2753 set__nproc(thread, new_nth);
2758 root = thread->th.th_root;
2759 if (__kmp_init_parallel && (!root->r.r_active) &&
2760 (root->r.r_hot_team->t.t_nproc > new_nth)
2761#
if KMP_NESTED_HOT_TEAMS
2762 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2765 kmp_team_t *hot_team = root->r.r_hot_team;
2768 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2770 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2771 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2774 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2775 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2776 if (__kmp_tasking_mode != tskm_immediate_exec) {
2779 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2781 __kmp_free_thread(hot_team->t.t_threads[f]);
2782 hot_team->t.t_threads[f] = NULL;
2784 hot_team->t.t_nproc = new_nth;
2785#if KMP_NESTED_HOT_TEAMS
2786 if (thread->th.th_hot_teams) {
2787 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2788 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2792 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2793 hot_team->t.b->update_num_threads(new_nth);
2794 __kmp_add_threads_to_team(hot_team, new_nth);
2797 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2800 for (f = 0; f < new_nth; f++) {
2801 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2802 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2805 hot_team->t.t_size_changed = -1;
2810void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2813 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2815 gtid, max_active_levels));
2816 KMP_DEBUG_ASSERT(__kmp_init_serial);
2819 if (max_active_levels < 0) {
2820 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2825 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2826 "max_active_levels for thread %d = (%d)\n",
2827 gtid, max_active_levels));
2830 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2835 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2836 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2837 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2843 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2844 "max_active_levels for thread %d = (%d)\n",
2845 gtid, max_active_levels));
2847 thread = __kmp_threads[gtid];
2849 __kmp_save_internal_controls(thread);
2851 set__max_active_levels(thread, max_active_levels);
2855int __kmp_get_max_active_levels(
int gtid) {
2858 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2859 KMP_DEBUG_ASSERT(__kmp_init_serial);
2861 thread = __kmp_threads[gtid];
2862 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2863 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2864 "curtask_maxaclevel=%d\n",
2865 gtid, thread->th.th_current_task,
2866 thread->th.th_current_task->td_icvs.max_active_levels));
2867 return thread->th.th_current_task->td_icvs.max_active_levels;
2871void __kmp_set_num_teams(
int num_teams) {
2873 __kmp_nteams = num_teams;
2875int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2877void __kmp_set_teams_thread_limit(
int limit) {
2879 __kmp_teams_thread_limit = limit;
2881int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2883KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2884KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2887void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2889 kmp_sched_t orig_kind;
2892 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2893 gtid, (
int)kind, chunk));
2894 KMP_DEBUG_ASSERT(__kmp_init_serial);
2901 kind = __kmp_sched_without_mods(kind);
2903 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2904 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2906 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2907 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2909 kind = kmp_sched_default;
2913 thread = __kmp_threads[gtid];
2915 __kmp_save_internal_controls(thread);
2917 if (kind < kmp_sched_upper_std) {
2918 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2921 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2923 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2924 __kmp_sch_map[kind - kmp_sched_lower - 1];
2929 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2930 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2931 kmp_sched_lower - 2];
2933 __kmp_sched_apply_mods_intkind(
2934 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2935 if (kind == kmp_sched_auto || chunk < 1) {
2937 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2939 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2944void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2948 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2949 KMP_DEBUG_ASSERT(__kmp_init_serial);
2951 thread = __kmp_threads[gtid];
2953 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2954 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2956 case kmp_sch_static_greedy:
2957 case kmp_sch_static_balanced:
2958 *kind = kmp_sched_static;
2959 __kmp_sched_apply_mods_stdkind(kind, th_type);
2962 case kmp_sch_static_chunked:
2963 *kind = kmp_sched_static;
2965 case kmp_sch_dynamic_chunked:
2966 *kind = kmp_sched_dynamic;
2969 case kmp_sch_guided_iterative_chunked:
2970 case kmp_sch_guided_analytical_chunked:
2971 *kind = kmp_sched_guided;
2974 *kind = kmp_sched_auto;
2976 case kmp_sch_trapezoidal:
2977 *kind = kmp_sched_trapezoidal;
2979#if KMP_STATIC_STEAL_ENABLED
2980 case kmp_sch_static_steal:
2981 *kind = kmp_sched_static_steal;
2985 KMP_FATAL(UnknownSchedulingType, th_type);
2988 __kmp_sched_apply_mods_stdkind(kind, th_type);
2989 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2992int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2998 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2999 KMP_DEBUG_ASSERT(__kmp_init_serial);
3006 thr = __kmp_threads[gtid];
3007 team = thr->th.th_team;
3008 ii = team->t.t_level;
3012 if (thr->th.th_teams_microtask) {
3014 int tlevel = thr->th.th_teams_level;
3017 KMP_DEBUG_ASSERT(ii >= tlevel);
3029 return __kmp_tid_from_gtid(gtid);
3031 dd = team->t.t_serialized;
3033 while (ii > level) {
3034 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3036 if ((team->t.t_serialized) && (!dd)) {
3037 team = team->t.t_parent;
3041 team = team->t.t_parent;
3042 dd = team->t.t_serialized;
3047 return (dd > 1) ? (0) : (team->t.t_master_tid);
3050int __kmp_get_team_size(
int gtid,
int level) {
3056 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3057 KMP_DEBUG_ASSERT(__kmp_init_serial);
3064 thr = __kmp_threads[gtid];
3065 team = thr->th.th_team;
3066 ii = team->t.t_level;
3070 if (thr->th.th_teams_microtask) {
3072 int tlevel = thr->th.th_teams_level;
3075 KMP_DEBUG_ASSERT(ii >= tlevel);
3086 while (ii > level) {
3087 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3089 if (team->t.t_serialized && (!dd)) {
3090 team = team->t.t_parent;
3094 team = team->t.t_parent;
3099 return team->t.t_nproc;
3102kmp_r_sched_t __kmp_get_schedule_global() {
3107 kmp_r_sched_t r_sched;
3113 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3114 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3117 r_sched.r_sched_type = __kmp_static;
3120 r_sched.r_sched_type = __kmp_guided;
3122 r_sched.r_sched_type = __kmp_sched;
3124 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3126 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3128 r_sched.chunk = KMP_DEFAULT_CHUNK;
3130 r_sched.chunk = __kmp_chunk;
3138static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3140 KMP_DEBUG_ASSERT(team);
3141 if (!realloc || argc > team->t.t_max_argc) {
3143 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3144 "current entries=%d\n",
3145 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3147 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3148 __kmp_free((
void *)team->t.t_argv);
3150 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3152 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3153 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3155 team->t.t_id, team->t.t_max_argc));
3156 team->t.t_argv = &team->t.t_inline_argv[0];
3157 if (__kmp_storage_map) {
3158 __kmp_print_storage_map_gtid(
3159 -1, &team->t.t_inline_argv[0],
3160 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3161 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3166 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3167 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3169 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3171 team->t.t_id, team->t.t_max_argc));
3173 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3174 if (__kmp_storage_map) {
3175 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3176 &team->t.t_argv[team->t.t_max_argc],
3177 sizeof(
void *) * team->t.t_max_argc,
3178 "team_%d.t_argv", team->t.t_id);
3184static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3186 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3188 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3189 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3190 sizeof(dispatch_shared_info_t) * num_disp_buff);
3191 team->t.t_dispatch =
3192 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3193 team->t.t_implicit_task_taskdata =
3194 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3195 team->t.t_max_nproc = max_nth;
3198 for (i = 0; i < num_disp_buff; ++i) {
3199 team->t.t_disp_buffer[i].buffer_index = i;
3200 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3204static void __kmp_free_team_arrays(kmp_team_t *team) {
3207 for (i = 0; i < team->t.t_max_nproc; ++i) {
3208 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3209 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3210 team->t.t_dispatch[i].th_disp_buffer = NULL;
3213#if KMP_USE_HIER_SCHED
3214 __kmp_dispatch_free_hierarchies(team);
3216 __kmp_free(team->t.t_threads);
3217 __kmp_free(team->t.t_disp_buffer);
3218 __kmp_free(team->t.t_dispatch);
3219 __kmp_free(team->t.t_implicit_task_taskdata);
3220 team->t.t_threads = NULL;
3221 team->t.t_disp_buffer = NULL;
3222 team->t.t_dispatch = NULL;
3223 team->t.t_implicit_task_taskdata = 0;
3226static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3227 kmp_info_t **oldThreads = team->t.t_threads;
3229 __kmp_free(team->t.t_disp_buffer);
3230 __kmp_free(team->t.t_dispatch);
3231 __kmp_free(team->t.t_implicit_task_taskdata);
3232 __kmp_allocate_team_arrays(team, max_nth);
3234 KMP_MEMCPY(team->t.t_threads, oldThreads,
3235 team->t.t_nproc *
sizeof(kmp_info_t *));
3237 __kmp_free(oldThreads);
3240static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3242 kmp_r_sched_t r_sched =
3243 __kmp_get_schedule_global();
3245 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3247 kmp_internal_control_t g_icvs = {
3249 (kmp_int8)__kmp_global.g.g_dynamic,
3251 (kmp_int8)__kmp_env_blocktime,
3253 __kmp_dflt_blocktime,
3258 __kmp_dflt_team_nth,
3264 __kmp_dflt_max_active_levels,
3268 __kmp_nested_proc_bind.bind_types[0],
3269 __kmp_default_device,
3276static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3278 kmp_internal_control_t gx_icvs;
3279 gx_icvs.serial_nesting_level =
3281 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3282 gx_icvs.next = NULL;
3287static void __kmp_initialize_root(kmp_root_t *root) {
3289 kmp_team_t *root_team;
3290 kmp_team_t *hot_team;
3291 int hot_team_max_nth;
3292 kmp_r_sched_t r_sched =
3293 __kmp_get_schedule_global();
3294 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3295 KMP_DEBUG_ASSERT(root);
3296 KMP_ASSERT(!root->r.r_begin);
3299 __kmp_init_lock(&root->r.r_begin_lock);
3300 root->r.r_begin = FALSE;
3301 root->r.r_active = FALSE;
3302 root->r.r_in_parallel = 0;
3303 root->r.r_blocktime = __kmp_dflt_blocktime;
3304#if KMP_AFFINITY_SUPPORTED
3305 root->r.r_affinity_assigned = FALSE;
3310 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3313 __kmp_allocate_team(root,
3319 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3321 USE_NESTED_HOT_ARG(NULL)
3326 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3329 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3331 root->r.r_root_team = root_team;
3332 root_team->t.t_control_stack_top = NULL;
3335 root_team->t.t_threads[0] = NULL;
3336 root_team->t.t_nproc = 1;
3337 root_team->t.t_serialized = 1;
3339 root_team->t.t_sched.sched = r_sched.sched;
3342 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3343 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3347 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3350 __kmp_allocate_team(root,
3352 __kmp_dflt_team_nth_ub * 2,
3356 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3358 USE_NESTED_HOT_ARG(NULL)
3360 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3362 root->r.r_hot_team = hot_team;
3363 root_team->t.t_control_stack_top = NULL;
3366 hot_team->t.t_parent = root_team;
3369 hot_team_max_nth = hot_team->t.t_max_nproc;
3370 for (f = 0; f < hot_team_max_nth; ++f) {
3371 hot_team->t.t_threads[f] = NULL;
3373 hot_team->t.t_nproc = 1;
3375 hot_team->t.t_sched.sched = r_sched.sched;
3376 hot_team->t.t_size_changed = 0;
3381typedef struct kmp_team_list_item {
3382 kmp_team_p
const *entry;
3383 struct kmp_team_list_item *next;
3384} kmp_team_list_item_t;
3385typedef kmp_team_list_item_t *kmp_team_list_t;
3387static void __kmp_print_structure_team_accum(
3388 kmp_team_list_t list,
3389 kmp_team_p
const *team
3399 KMP_DEBUG_ASSERT(list != NULL);
3404 __kmp_print_structure_team_accum(list, team->t.t_parent);
3405 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3409 while (l->next != NULL && l->entry != team) {
3412 if (l->next != NULL) {
3418 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3424 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3425 sizeof(kmp_team_list_item_t));
3432static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3435 __kmp_printf(
"%s", title);
3437 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3439 __kmp_printf(
" - (nil)\n");
3443static void __kmp_print_structure_thread(
char const *title,
3444 kmp_info_p
const *thread) {
3445 __kmp_printf(
"%s", title);
3446 if (thread != NULL) {
3447 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3449 __kmp_printf(
" - (nil)\n");
3453void __kmp_print_structure(
void) {
3455 kmp_team_list_t list;
3459 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3463 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3464 "Table\n------------------------------\n");
3467 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3468 __kmp_printf(
"%2d", gtid);
3469 if (__kmp_threads != NULL) {
3470 __kmp_printf(
" %p", __kmp_threads[gtid]);
3472 if (__kmp_root != NULL) {
3473 __kmp_printf(
" %p", __kmp_root[gtid]);
3480 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3482 if (__kmp_threads != NULL) {
3484 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3485 kmp_info_t
const *thread = __kmp_threads[gtid];
3486 if (thread != NULL) {
3487 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3488 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3489 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3490 __kmp_print_structure_team(
" Serial Team: ",
3491 thread->th.th_serial_team);
3492 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3493 __kmp_print_structure_thread(
" Primary: ",
3494 thread->th.th_team_master);
3495 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3496 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3497 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3498 __kmp_print_structure_thread(
" Next in pool: ",
3499 thread->th.th_next_pool);
3501 __kmp_print_structure_team_accum(list, thread->th.th_team);
3502 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3506 __kmp_printf(
"Threads array is not allocated.\n");
3510 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3512 if (__kmp_root != NULL) {
3514 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3515 kmp_root_t
const *root = __kmp_root[gtid];
3517 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3518 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3519 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3520 __kmp_print_structure_thread(
" Uber Thread: ",
3521 root->r.r_uber_thread);
3522 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3523 __kmp_printf(
" In Parallel: %2d\n",
3524 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3526 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3527 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3531 __kmp_printf(
"Ubers array is not allocated.\n");
3534 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3536 while (list->next != NULL) {
3537 kmp_team_p
const *team = list->entry;
3539 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3540 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3541 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3542 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3543 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3544 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3545 for (i = 0; i < team->t.t_nproc; ++i) {
3546 __kmp_printf(
" Thread %2d: ", i);
3547 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3549 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3555 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3557 __kmp_print_structure_thread(
"Thread pool: ",
3558 CCAST(kmp_info_t *, __kmp_thread_pool));
3559 __kmp_print_structure_team(
"Team pool: ",
3560 CCAST(kmp_team_t *, __kmp_team_pool));
3564 while (list != NULL) {
3565 kmp_team_list_item_t *item = list;
3567 KMP_INTERNAL_FREE(item);
3576static const unsigned __kmp_primes[] = {
3577 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3578 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3579 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3580 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3581 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3582 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3583 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3584 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3585 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3586 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3587 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3591unsigned short __kmp_get_random(kmp_info_t *thread) {
3592 unsigned x = thread->th.th_x;
3593 unsigned short r = (
unsigned short)(x >> 16);
3595 thread->th.th_x = x * thread->th.th_a + 1;
3597 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3598 thread->th.th_info.ds.ds_tid, r));
3604void __kmp_init_random(kmp_info_t *thread) {
3605 unsigned seed = thread->th.th_info.ds.ds_tid;
3608 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3609 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3611 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3617static int __kmp_reclaim_dead_roots(
void) {
3620 for (i = 0; i < __kmp_threads_capacity; ++i) {
3621 if (KMP_UBER_GTID(i) &&
3622 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3625 r += __kmp_unregister_root_other_thread(i);
3650static int __kmp_expand_threads(
int nNeed) {
3652 int minimumRequiredCapacity;
3654 kmp_info_t **newThreads;
3655 kmp_root_t **newRoot;
3661#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3664 added = __kmp_reclaim_dead_roots();
3693 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3696 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3700 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3702 newCapacity = __kmp_threads_capacity;
3704 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3705 : __kmp_sys_max_nth;
3706 }
while (newCapacity < minimumRequiredCapacity);
3707 newThreads = (kmp_info_t **)__kmp_allocate(
3708 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3710 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3711 KMP_MEMCPY(newThreads, __kmp_threads,
3712 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3713 KMP_MEMCPY(newRoot, __kmp_root,
3714 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3717 kmp_old_threads_list_t *node =
3718 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3719 node->threads = __kmp_threads;
3720 node->next = __kmp_old_threads_list;
3721 __kmp_old_threads_list = node;
3723 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3724 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3725 added += newCapacity - __kmp_threads_capacity;
3726 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3728 if (newCapacity > __kmp_tp_capacity) {
3729 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3730 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3731 __kmp_threadprivate_resize_cache(newCapacity);
3733 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3735 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3744int __kmp_register_root(
int initial_thread) {
3745 kmp_info_t *root_thread;
3749 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3750 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3767 capacity = __kmp_threads_capacity;
3768 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3775 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3776 capacity -= __kmp_hidden_helper_threads_num;
3780 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3781 if (__kmp_tp_cached) {
3782 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3783 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3784 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3786 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3796 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3799 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3800 gtid <= __kmp_hidden_helper_threads_num;
3803 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3804 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3805 "hidden helper thread: T#%d\n",
3811 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3814 for (gtid = __kmp_hidden_helper_threads_num + 1;
3815 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3819 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3820 KMP_ASSERT(gtid < __kmp_threads_capacity);
3825 TCW_4(__kmp_nth, __kmp_nth + 1);
3829 if (__kmp_adjust_gtid_mode) {
3830 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3831 if (TCR_4(__kmp_gtid_mode) != 2) {
3832 TCW_4(__kmp_gtid_mode, 2);
3835 if (TCR_4(__kmp_gtid_mode) != 1) {
3836 TCW_4(__kmp_gtid_mode, 1);
3841#ifdef KMP_ADJUST_BLOCKTIME
3844 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3845 if (__kmp_nth > __kmp_avail_proc) {
3846 __kmp_zero_bt = TRUE;
3852 if (!(root = __kmp_root[gtid])) {
3853 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3854 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3857#if KMP_STATS_ENABLED
3859 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3860 __kmp_stats_thread_ptr->startLife();
3861 KMP_SET_THREAD_STATE(SERIAL_REGION);
3864 __kmp_initialize_root(root);
3867 if (root->r.r_uber_thread) {
3868 root_thread = root->r.r_uber_thread;
3870 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3871 if (__kmp_storage_map) {
3872 __kmp_print_thread_storage_map(root_thread, gtid);
3874 root_thread->th.th_info.ds.ds_gtid = gtid;
3876 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3878 root_thread->th.th_root = root;
3879 if (__kmp_env_consistency_check) {
3880 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3883 __kmp_initialize_fast_memory(root_thread);
3887 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3888 __kmp_initialize_bget(root_thread);
3890 __kmp_init_random(root_thread);
3894 if (!root_thread->th.th_serial_team) {
3895 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3896 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3897 root_thread->th.th_serial_team = __kmp_allocate_team(
3902 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3904 KMP_ASSERT(root_thread->th.th_serial_team);
3905 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3906 root_thread->th.th_serial_team));
3909 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3911 root->r.r_root_team->t.t_threads[0] = root_thread;
3912 root->r.r_hot_team->t.t_threads[0] = root_thread;
3913 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3915 root_thread->th.th_serial_team->t.t_serialized = 0;
3916 root->r.r_uber_thread = root_thread;
3919 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3920 TCW_4(__kmp_init_gtid, TRUE);
3923 __kmp_gtid_set_specific(gtid);
3926 __kmp_itt_thread_name(gtid);
3929#ifdef KMP_TDATA_GTID
3932 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3933 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3935 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3937 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3938 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3939 KMP_INIT_BARRIER_STATE));
3942 for (b = 0; b < bs_last_barrier; ++b) {
3943 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3945 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3949 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3950 KMP_INIT_BARRIER_STATE);
3952#if KMP_AFFINITY_SUPPORTED
3953 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3954 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3955 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3956 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3958 root_thread->th.th_def_allocator = __kmp_def_allocator;
3959 root_thread->th.th_prev_level = 0;
3960 root_thread->th.th_prev_num_threads = 1;
3962 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3963 tmp->cg_root = root_thread;
3964 tmp->cg_thread_limit = __kmp_cg_max_nth;
3965 tmp->cg_nthreads = 1;
3966 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3967 " cg_nthreads init to 1\n",
3970 root_thread->th.th_cg_roots = tmp;
3972 __kmp_root_counter++;
3975 if (!initial_thread && ompt_enabled.enabled) {
3977 kmp_info_t *root_thread = ompt_get_thread();
3979 ompt_set_thread_state(root_thread, ompt_state_overhead);
3981 if (ompt_enabled.ompt_callback_thread_begin) {
3982 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3983 ompt_thread_initial, __ompt_get_thread_data_internal());
3985 ompt_data_t *task_data;
3986 ompt_data_t *parallel_data;
3987 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3989 if (ompt_enabled.ompt_callback_implicit_task) {
3990 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3991 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3994 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3998 if (ompd_state & OMPD_ENABLE_BP)
3999 ompd_bp_thread_begin();
4003 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4008#if KMP_NESTED_HOT_TEAMS
4009static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4010 const int max_level) {
4012 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4013 if (!hot_teams || !hot_teams[level].hot_team) {
4016 KMP_DEBUG_ASSERT(level < max_level);
4017 kmp_team_t *team = hot_teams[level].hot_team;
4018 nth = hot_teams[level].hot_team_nth;
4020 if (level < max_level - 1) {
4021 for (i = 0; i < nth; ++i) {
4022 kmp_info_t *th = team->t.t_threads[i];
4023 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4024 if (i > 0 && th->th.th_hot_teams) {
4025 __kmp_free(th->th.th_hot_teams);
4026 th->th.th_hot_teams = NULL;
4030 __kmp_free_team(root, team, NULL);
4037static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4038 kmp_team_t *root_team = root->r.r_root_team;
4039 kmp_team_t *hot_team = root->r.r_hot_team;
4040 int n = hot_team->t.t_nproc;
4043 KMP_DEBUG_ASSERT(!root->r.r_active);
4045 root->r.r_root_team = NULL;
4046 root->r.r_hot_team = NULL;
4049 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4050#if KMP_NESTED_HOT_TEAMS
4051 if (__kmp_hot_teams_max_level >
4053 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4054 kmp_info_t *th = hot_team->t.t_threads[i];
4055 if (__kmp_hot_teams_max_level > 1) {
4056 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4058 if (th->th.th_hot_teams) {
4059 __kmp_free(th->th.th_hot_teams);
4060 th->th.th_hot_teams = NULL;
4065 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4070 if (__kmp_tasking_mode != tskm_immediate_exec) {
4071 __kmp_wait_to_unref_task_teams();
4077 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4079 (LPVOID) & (root->r.r_uber_thread->th),
4080 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4081 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4085 if (ompd_state & OMPD_ENABLE_BP)
4086 ompd_bp_thread_end();
4090 ompt_data_t *task_data;
4091 ompt_data_t *parallel_data;
4092 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4094 if (ompt_enabled.ompt_callback_implicit_task) {
4095 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4096 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4098 if (ompt_enabled.ompt_callback_thread_end) {
4099 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4100 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4106 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4107 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4109 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4110 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4113 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4114 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4115 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4116 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4117 root->r.r_uber_thread->th.th_cg_roots = NULL;
4119 __kmp_reap_thread(root->r.r_uber_thread, 1);
4123 root->r.r_uber_thread = NULL;
4125 root->r.r_begin = FALSE;
4130void __kmp_unregister_root_current_thread(
int gtid) {
4131 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4135 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4136 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4137 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4140 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4143 kmp_root_t *root = __kmp_root[gtid];
4145 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4146 KMP_ASSERT(KMP_UBER_GTID(gtid));
4147 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4148 KMP_ASSERT(root->r.r_active == FALSE);
4152 kmp_info_t *thread = __kmp_threads[gtid];
4153 kmp_team_t *team = thread->th.th_team;
4154 kmp_task_team_t *task_team = thread->th.th_task_team;
4157 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4158 task_team->tt.tt_hidden_helper_task_encountered)) {
4161 thread->th.ompt_thread_info.state = ompt_state_undefined;
4163 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4166 __kmp_reset_root(gtid, root);
4170 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4172 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4179static int __kmp_unregister_root_other_thread(
int gtid) {
4180 kmp_root_t *root = __kmp_root[gtid];
4183 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4184 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4185 KMP_ASSERT(KMP_UBER_GTID(gtid));
4186 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4187 KMP_ASSERT(root->r.r_active == FALSE);
4189 r = __kmp_reset_root(gtid, root);
4191 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4197void __kmp_task_info() {
4199 kmp_int32 gtid = __kmp_entry_gtid();
4200 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4201 kmp_info_t *this_thr = __kmp_threads[gtid];
4202 kmp_team_t *steam = this_thr->th.th_serial_team;
4203 kmp_team_t *team = this_thr->th.th_team;
4206 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4208 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4209 team->t.t_implicit_task_taskdata[tid].td_parent);
4216static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4217 int tid,
int gtid) {
4221 KMP_DEBUG_ASSERT(this_thr != NULL);
4222 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4223 KMP_DEBUG_ASSERT(team);
4224 KMP_DEBUG_ASSERT(team->t.t_threads);
4225 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4226 kmp_info_t *master = team->t.t_threads[0];
4227 KMP_DEBUG_ASSERT(master);
4228 KMP_DEBUG_ASSERT(master->th.th_root);
4232 TCW_SYNC_PTR(this_thr->th.th_team, team);
4234 this_thr->th.th_info.ds.ds_tid = tid;
4235 this_thr->th.th_set_nproc = 0;
4236 if (__kmp_tasking_mode != tskm_immediate_exec)
4239 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4241 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4242 this_thr->th.th_set_proc_bind = proc_bind_default;
4243#if KMP_AFFINITY_SUPPORTED
4244 this_thr->th.th_new_place = this_thr->th.th_current_place;
4246 this_thr->th.th_root = master->th.th_root;
4249 this_thr->th.th_team_nproc = team->t.t_nproc;
4250 this_thr->th.th_team_master = master;
4251 this_thr->th.th_team_serialized = team->t.t_serialized;
4253 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4255 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4256 tid, gtid, this_thr, this_thr->th.th_current_task));
4258 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4261 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4262 tid, gtid, this_thr, this_thr->th.th_current_task));
4267 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4269 this_thr->th.th_local.this_construct = 0;
4271 if (!this_thr->th.th_pri_common) {
4272 this_thr->th.th_pri_common =
4273 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4274 if (__kmp_storage_map) {
4275 __kmp_print_storage_map_gtid(
4276 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4277 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4279 this_thr->th.th_pri_head = NULL;
4282 if (this_thr != master &&
4283 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4285 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4286 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4289 int i = tmp->cg_nthreads--;
4290 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4291 " on node %p of thread %p to %d\n",
4292 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4297 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4299 this_thr->th.th_cg_roots->cg_nthreads++;
4300 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4301 " node %p of thread %p to %d\n",
4302 this_thr, this_thr->th.th_cg_roots,
4303 this_thr->th.th_cg_roots->cg_root,
4304 this_thr->th.th_cg_roots->cg_nthreads));
4305 this_thr->th.th_current_task->td_icvs.thread_limit =
4306 this_thr->th.th_cg_roots->cg_thread_limit;
4311 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4314 sizeof(dispatch_private_info_t) *
4315 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4316 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4317 team->t.t_max_nproc));
4318 KMP_ASSERT(dispatch);
4319 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4320 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4322 dispatch->th_disp_index = 0;
4323 dispatch->th_doacross_buf_idx = 0;
4324 if (!dispatch->th_disp_buffer) {
4325 dispatch->th_disp_buffer =
4326 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4328 if (__kmp_storage_map) {
4329 __kmp_print_storage_map_gtid(
4330 gtid, &dispatch->th_disp_buffer[0],
4331 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4333 : __kmp_dispatch_num_buffers],
4335 "th_%d.th_dispatch.th_disp_buffer "
4336 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4337 gtid, team->t.t_id, gtid);
4340 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4343 dispatch->th_dispatch_pr_current = 0;
4344 dispatch->th_dispatch_sh_current = 0;
4346 dispatch->th_deo_fcn = 0;
4347 dispatch->th_dxo_fcn = 0;
4350 this_thr->th.th_next_pool = NULL;
4352 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4353 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4363kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4365 kmp_team_t *serial_team;
4366 kmp_info_t *new_thr;
4369 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4370 KMP_DEBUG_ASSERT(root && team);
4371#if !KMP_NESTED_HOT_TEAMS
4372 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4379 if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {
4380 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4381 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4382 if (new_thr == __kmp_thread_pool_insert_pt) {
4383 __kmp_thread_pool_insert_pt = NULL;
4385 TCW_4(new_thr->th.th_in_pool, FALSE);
4386 __kmp_suspend_initialize_thread(new_thr);
4387 __kmp_lock_suspend_mx(new_thr);
4388 if (new_thr->th.th_active_in_pool == TRUE) {
4389 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4390 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4391 new_thr->th.th_active_in_pool = FALSE;
4393 __kmp_unlock_suspend_mx(new_thr);
4395 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4396 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4397 KMP_ASSERT(!new_thr->th.th_team);
4398 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4401 __kmp_initialize_info(new_thr, team, new_tid,
4402 new_thr->th.th_info.ds.ds_gtid);
4403 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4405 TCW_4(__kmp_nth, __kmp_nth + 1);
4407 new_thr->th.th_task_state = 0;
4409 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4411 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4415#ifdef KMP_ADJUST_BLOCKTIME
4418 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4419 if (__kmp_nth > __kmp_avail_proc) {
4420 __kmp_zero_bt = TRUE;
4429 kmp_balign_t *balign = new_thr->th.th_bar;
4430 for (b = 0; b < bs_last_barrier; ++b)
4431 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4434 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4435 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4442 KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) || __kmp_nth == __kmp_all_nth);
4443 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4448 if (!TCR_4(__kmp_init_monitor)) {
4449 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4450 if (!TCR_4(__kmp_init_monitor)) {
4451 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4452 TCW_4(__kmp_init_monitor, 1);
4453 __kmp_create_monitor(&__kmp_monitor);
4454 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4465 while (TCR_4(__kmp_init_monitor) < 2) {
4468 KF_TRACE(10, (
"after monitor thread has started\n"));
4471 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4478 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4480 : __kmp_hidden_helper_threads_num + 1;
4482 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4484 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4487 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4488 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4493 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4495 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4497#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4500 __itt_suppress_mark_range(
4501 __itt_suppress_range, __itt_suppress_threading_errors,
4502 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4503 __itt_suppress_mark_range(
4504 __itt_suppress_range, __itt_suppress_threading_errors,
4505 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4507 __itt_suppress_mark_range(
4508 __itt_suppress_range, __itt_suppress_threading_errors,
4509 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4511 __itt_suppress_mark_range(__itt_suppress_range,
4512 __itt_suppress_threading_errors,
4513 &new_thr->th.th_suspend_init_count,
4514 sizeof(new_thr->th.th_suspend_init_count));
4517 __itt_suppress_mark_range(__itt_suppress_range,
4518 __itt_suppress_threading_errors,
4519 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4520 sizeof(new_thr->th.th_bar[0].bb.b_go));
4521 __itt_suppress_mark_range(__itt_suppress_range,
4522 __itt_suppress_threading_errors,
4523 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4524 sizeof(new_thr->th.th_bar[1].bb.b_go));
4525 __itt_suppress_mark_range(__itt_suppress_range,
4526 __itt_suppress_threading_errors,
4527 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4528 sizeof(new_thr->th.th_bar[2].bb.b_go));
4530 if (__kmp_storage_map) {
4531 __kmp_print_thread_storage_map(new_thr, new_gtid);
4536 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4537 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4538 new_thr->th.th_serial_team = serial_team =
4539 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4543 proc_bind_default, &r_icvs,
4544 0 USE_NESTED_HOT_ARG(NULL));
4546 KMP_ASSERT(serial_team);
4547 serial_team->t.t_serialized = 0;
4549 serial_team->t.t_threads[0] = new_thr;
4551 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4555 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4558 __kmp_initialize_fast_memory(new_thr);
4562 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4563 __kmp_initialize_bget(new_thr);
4566 __kmp_init_random(new_thr);
4570 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4571 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4574 kmp_balign_t *balign = new_thr->th.th_bar;
4575 for (b = 0; b < bs_last_barrier; ++b) {
4576 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4577 balign[b].bb.team = NULL;
4578 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4579 balign[b].bb.use_oncore_barrier = 0;
4582 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4583 new_thr->th.th_sleep_loc_type = flag_unset;
4585 new_thr->th.th_spin_here = FALSE;
4586 new_thr->th.th_next_waiting = 0;
4588 new_thr->th.th_blocking =
false;
4591#if KMP_AFFINITY_SUPPORTED
4592 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4593 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4594 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4595 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4597 new_thr->th.th_def_allocator = __kmp_def_allocator;
4598 new_thr->th.th_prev_level = 0;
4599 new_thr->th.th_prev_num_threads = 1;
4601 TCW_4(new_thr->th.th_in_pool, FALSE);
4602 new_thr->th.th_active_in_pool = FALSE;
4603 TCW_4(new_thr->th.th_active, TRUE);
4611 if (__kmp_adjust_gtid_mode) {
4612 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4613 if (TCR_4(__kmp_gtid_mode) != 2) {
4614 TCW_4(__kmp_gtid_mode, 2);
4617 if (TCR_4(__kmp_gtid_mode) != 1) {
4618 TCW_4(__kmp_gtid_mode, 1);
4623#ifdef KMP_ADJUST_BLOCKTIME
4626 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4627 if (__kmp_nth > __kmp_avail_proc) {
4628 __kmp_zero_bt = TRUE;
4633#if KMP_AFFINITY_SUPPORTED
4635 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4640 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4641 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4643 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4645 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4656static void __kmp_reinitialize_team(kmp_team_t *team,
4657 kmp_internal_control_t *new_icvs,
4659 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4660 team->t.t_threads[0], team));
4661 KMP_DEBUG_ASSERT(team && new_icvs);
4662 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4663 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4665 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4667 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4668 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4670 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4671 team->t.t_threads[0], team));
4677static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4678 kmp_internal_control_t *new_icvs,
4680 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4683 KMP_DEBUG_ASSERT(team);
4684 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4685 KMP_DEBUG_ASSERT(team->t.t_threads);
4688 team->t.t_master_tid = 0;
4690 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4691 team->t.t_nproc = new_nproc;
4694 team->t.t_next_pool = NULL;
4698 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4699 team->t.t_invoke = NULL;
4702 team->t.t_sched.sched = new_icvs->sched.sched;
4704#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4705 team->t.t_fp_control_saved = FALSE;
4706 team->t.t_x87_fpu_control_word = 0;
4707 team->t.t_mxcsr = 0;
4710 team->t.t_construct = 0;
4712 team->t.t_ordered.dt.t_value = 0;
4713 team->t.t_master_active = FALSE;
4716 team->t.t_copypriv_data = NULL;
4719 team->t.t_copyin_counter = 0;
4722 team->t.t_control_stack_top = NULL;
4724 __kmp_reinitialize_team(team, new_icvs, loc);
4727 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4730#if KMP_AFFINITY_SUPPORTED
4731static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4732 int first,
int last,
int newp) {
4733 th->th.th_first_place = first;
4734 th->th.th_last_place = last;
4735 th->th.th_new_place = newp;
4736 if (newp != th->th.th_current_place) {
4737 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4738 team->t.t_display_affinity = 1;
4740 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4741 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4749static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4751 if (KMP_HIDDEN_HELPER_TEAM(team))
4754 kmp_info_t *master_th = team->t.t_threads[0];
4755 KMP_DEBUG_ASSERT(master_th != NULL);
4756 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4757 int first_place = master_th->th.th_first_place;
4758 int last_place = master_th->th.th_last_place;
4759 int masters_place = master_th->th.th_current_place;
4760 int num_masks = __kmp_affinity.num_masks;
4761 team->t.t_first_place = first_place;
4762 team->t.t_last_place = last_place;
4764 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4765 "bound to place %d partition = [%d,%d]\n",
4766 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4767 team->t.t_id, masters_place, first_place, last_place));
4769 switch (proc_bind) {
4771 case proc_bind_default:
4774 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4777 case proc_bind_primary: {
4779 int n_th = team->t.t_nproc;
4780 for (f = 1; f < n_th; f++) {
4781 kmp_info_t *th = team->t.t_threads[f];
4782 KMP_DEBUG_ASSERT(th != NULL);
4783 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4785 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4786 "partition = [%d,%d]\n",
4787 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4788 f, masters_place, first_place, last_place));
4792 case proc_bind_close: {
4794 int n_th = team->t.t_nproc;
4796 if (first_place <= last_place) {
4797 n_places = last_place - first_place + 1;
4799 n_places = num_masks - first_place + last_place + 1;
4801 if (n_th <= n_places) {
4802 int place = masters_place;
4803 for (f = 1; f < n_th; f++) {
4804 kmp_info_t *th = team->t.t_threads[f];
4805 KMP_DEBUG_ASSERT(th != NULL);
4807 if (place == last_place) {
4808 place = first_place;
4809 }
else if (place == (num_masks - 1)) {
4814 __kmp_set_thread_place(team, th, first_place, last_place, place);
4816 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4817 "partition = [%d,%d]\n",
4818 __kmp_gtid_from_thread(team->t.t_threads[f]),
4819 team->t.t_id, f, place, first_place, last_place));
4822 int S, rem, gap, s_count;
4823 S = n_th / n_places;
4825 rem = n_th - (S * n_places);
4826 gap = rem > 0 ? n_places / rem : n_places;
4827 int place = masters_place;
4829 for (f = 0; f < n_th; f++) {
4830 kmp_info_t *th = team->t.t_threads[f];
4831 KMP_DEBUG_ASSERT(th != NULL);
4833 __kmp_set_thread_place(team, th, first_place, last_place, place);
4836 if ((s_count == S) && rem && (gap_ct == gap)) {
4838 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4840 if (place == last_place) {
4841 place = first_place;
4842 }
else if (place == (num_masks - 1)) {
4850 }
else if (s_count == S) {
4851 if (place == last_place) {
4852 place = first_place;
4853 }
else if (place == (num_masks - 1)) {
4863 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4864 "partition = [%d,%d]\n",
4865 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4866 th->th.th_new_place, first_place, last_place));
4868 KMP_DEBUG_ASSERT(place == masters_place);
4872 case proc_bind_spread: {
4874 int n_th = team->t.t_nproc;
4877 if (first_place <= last_place) {
4878 n_places = last_place - first_place + 1;
4880 n_places = num_masks - first_place + last_place + 1;
4882 if (n_th <= n_places) {
4885 if (n_places != num_masks) {
4886 int S = n_places / n_th;
4887 int s_count, rem, gap, gap_ct;
4889 place = masters_place;
4890 rem = n_places - n_th * S;
4891 gap = rem ? n_th / rem : 1;
4894 if (update_master_only == 1)
4896 for (f = 0; f < thidx; f++) {
4897 kmp_info_t *th = team->t.t_threads[f];
4898 KMP_DEBUG_ASSERT(th != NULL);
4900 int fplace = place, nplace = place;
4902 while (s_count < S) {
4903 if (place == last_place) {
4904 place = first_place;
4905 }
else if (place == (num_masks - 1)) {
4912 if (rem && (gap_ct == gap)) {
4913 if (place == last_place) {
4914 place = first_place;
4915 }
else if (place == (num_masks - 1)) {
4923 __kmp_set_thread_place(team, th, fplace, place, nplace);
4926 if (place == last_place) {
4927 place = first_place;
4928 }
else if (place == (num_masks - 1)) {
4935 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4936 "partition = [%d,%d], num_masks: %u\n",
4937 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4938 f, th->th.th_new_place, th->th.th_first_place,
4939 th->th.th_last_place, num_masks));
4945 double current =
static_cast<double>(masters_place);
4947 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4952 if (update_master_only == 1)
4954 for (f = 0; f < thidx; f++) {
4955 first =
static_cast<int>(current);
4956 last =
static_cast<int>(current + spacing) - 1;
4957 KMP_DEBUG_ASSERT(last >= first);
4958 if (first >= n_places) {
4959 if (masters_place) {
4962 if (first == (masters_place + 1)) {
4963 KMP_DEBUG_ASSERT(f == n_th);
4966 if (last == masters_place) {
4967 KMP_DEBUG_ASSERT(f == (n_th - 1));
4971 KMP_DEBUG_ASSERT(f == n_th);
4976 if (last >= n_places) {
4977 last = (n_places - 1);
4982 KMP_DEBUG_ASSERT(0 <= first);
4983 KMP_DEBUG_ASSERT(n_places > first);
4984 KMP_DEBUG_ASSERT(0 <= last);
4985 KMP_DEBUG_ASSERT(n_places > last);
4986 KMP_DEBUG_ASSERT(last_place >= first_place);
4987 th = team->t.t_threads[f];
4988 KMP_DEBUG_ASSERT(th);
4989 __kmp_set_thread_place(team, th, first, last, place);
4991 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4992 "partition = [%d,%d], spacing = %.4f\n",
4993 __kmp_gtid_from_thread(team->t.t_threads[f]),
4994 team->t.t_id, f, th->th.th_new_place,
4995 th->th.th_first_place, th->th.th_last_place, spacing));
4999 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5001 int S, rem, gap, s_count;
5002 S = n_th / n_places;
5004 rem = n_th - (S * n_places);
5005 gap = rem > 0 ? n_places / rem : n_places;
5006 int place = masters_place;
5009 if (update_master_only == 1)
5011 for (f = 0; f < thidx; f++) {
5012 kmp_info_t *th = team->t.t_threads[f];
5013 KMP_DEBUG_ASSERT(th != NULL);
5015 __kmp_set_thread_place(team, th, place, place, place);
5018 if ((s_count == S) && rem && (gap_ct == gap)) {
5020 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5022 if (place == last_place) {
5023 place = first_place;
5024 }
else if (place == (num_masks - 1)) {
5032 }
else if (s_count == S) {
5033 if (place == last_place) {
5034 place = first_place;
5035 }
else if (place == (num_masks - 1)) {
5044 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5045 "partition = [%d,%d]\n",
5046 __kmp_gtid_from_thread(team->t.t_threads[f]),
5047 team->t.t_id, f, th->th.th_new_place,
5048 th->th.th_first_place, th->th.th_last_place));
5050 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5058 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5066__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5068 ompt_data_t ompt_parallel_data,
5070 kmp_proc_bind_t new_proc_bind,
5071 kmp_internal_control_t *new_icvs,
5072 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5073 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5076 int use_hot_team = !root->r.r_active;
5078 int do_place_partition = 1;
5080 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5081 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5082 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5085#if KMP_NESTED_HOT_TEAMS
5086 kmp_hot_team_ptr_t *hot_teams;
5088 team = master->th.th_team;
5089 level = team->t.t_active_level;
5090 if (master->th.th_teams_microtask) {
5091 if (master->th.th_teams_size.nteams > 1 &&
5094 (microtask_t)__kmp_teams_master ||
5095 master->th.th_teams_level <
5102 if ((master->th.th_teams_size.nteams == 1 &&
5103 master->th.th_teams_level >= team->t.t_level) ||
5104 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5105 do_place_partition = 0;
5107 hot_teams = master->th.th_hot_teams;
5108 if (level < __kmp_hot_teams_max_level && hot_teams &&
5109 hot_teams[level].hot_team) {
5117 KMP_DEBUG_ASSERT(new_nproc == 1);
5121 if (use_hot_team && new_nproc > 1) {
5122 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5123#if KMP_NESTED_HOT_TEAMS
5124 team = hot_teams[level].hot_team;
5126 team = root->r.r_hot_team;
5129 if (__kmp_tasking_mode != tskm_immediate_exec) {
5130 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5131 "task_team[1] = %p before reinit\n",
5132 team->t.t_task_team[0], team->t.t_task_team[1]));
5136 if (team->t.t_nproc != new_nproc &&
5137 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5139 int old_nthr = team->t.t_nproc;
5140 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5145 if (do_place_partition == 0)
5146 team->t.t_proc_bind = proc_bind_default;
5150 if (team->t.t_nproc == new_nproc) {
5151 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5154 if (team->t.t_size_changed == -1) {
5155 team->t.t_size_changed = 1;
5157 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5161 kmp_r_sched_t new_sched = new_icvs->sched;
5163 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5165 __kmp_reinitialize_team(team, new_icvs,
5166 root->r.r_uber_thread->th.th_ident);
5168 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5169 team->t.t_threads[0], team));
5170 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5172#if KMP_AFFINITY_SUPPORTED
5173 if ((team->t.t_size_changed == 0) &&
5174 (team->t.t_proc_bind == new_proc_bind)) {
5175 if (new_proc_bind == proc_bind_spread) {
5176 if (do_place_partition) {
5178 __kmp_partition_places(team, 1);
5181 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5182 "proc_bind = %d, partition = [%d,%d]\n",
5183 team->t.t_id, new_proc_bind, team->t.t_first_place,
5184 team->t.t_last_place));
5186 if (do_place_partition) {
5187 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5188 __kmp_partition_places(team);
5192 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5194 }
else if (team->t.t_nproc > new_nproc) {
5196 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5199 team->t.t_size_changed = 1;
5200 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5203 __kmp_add_threads_to_team(team, new_nproc);
5207 if (__kmp_tasking_mode != tskm_immediate_exec) {
5208 for (f = new_nproc; f < team->t.t_nproc; f++) {
5209 kmp_info_t *th = team->t.t_threads[f];
5210 KMP_DEBUG_ASSERT(th);
5211 th->th.th_task_team = NULL;
5214#if KMP_NESTED_HOT_TEAMS
5215 if (__kmp_hot_teams_mode == 0) {
5218 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5219 hot_teams[level].hot_team_nth = new_nproc;
5222 for (f = new_nproc; f < team->t.t_nproc; f++) {
5223 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5224 __kmp_free_thread(team->t.t_threads[f]);
5225 team->t.t_threads[f] = NULL;
5227#if KMP_NESTED_HOT_TEAMS
5232 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5233 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5234 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5235 for (
int b = 0; b < bs_last_barrier; ++b) {
5236 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5237 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5239 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5244 team->t.t_nproc = new_nproc;
5246 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5247 __kmp_reinitialize_team(team, new_icvs,
5248 root->r.r_uber_thread->th.th_ident);
5251 for (f = 0; f < new_nproc; ++f) {
5252 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5257 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5258 team->t.t_threads[0], team));
5260 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5263 for (f = 0; f < team->t.t_nproc; f++) {
5264 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5265 team->t.t_threads[f]->th.th_team_nproc ==
5270 if (do_place_partition) {
5271 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5272#if KMP_AFFINITY_SUPPORTED
5273 __kmp_partition_places(team);
5279 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5281 int old_nproc = team->t.t_nproc;
5282 team->t.t_size_changed = 1;
5284#if KMP_NESTED_HOT_TEAMS
5285 int avail_threads = hot_teams[level].hot_team_nth;
5286 if (new_nproc < avail_threads)
5287 avail_threads = new_nproc;
5288 kmp_info_t **other_threads = team->t.t_threads;
5289 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5293 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5294 for (b = 0; b < bs_last_barrier; ++b) {
5295 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5296 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5298 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5302 if (hot_teams[level].hot_team_nth >= new_nproc) {
5305 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5306 team->t.t_nproc = new_nproc;
5310 team->t.t_nproc = hot_teams[level].hot_team_nth;
5311 hot_teams[level].hot_team_nth = new_nproc;
5313 if (team->t.t_max_nproc < new_nproc) {
5315 __kmp_reallocate_team_arrays(team, new_nproc);
5316 __kmp_reinitialize_team(team, new_icvs, NULL);
5319#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5320 KMP_AFFINITY_SUPPORTED
5326 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5330 for (f = team->t.t_nproc; f < new_nproc; f++) {
5331 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5332 KMP_DEBUG_ASSERT(new_worker);
5333 team->t.t_threads[f] = new_worker;
5336 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5337 "join=%llu, plain=%llu\n",
5338 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5339 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5340 team->t.t_bar[bs_plain_barrier].b_arrived));
5344 kmp_balign_t *balign = new_worker->th.th_bar;
5345 for (b = 0; b < bs_last_barrier; ++b) {
5346 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5347 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5348 KMP_BARRIER_PARENT_FLAG);
5350 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5356#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5357 KMP_AFFINITY_SUPPORTED
5359 new_temp_affinity.restore();
5361#if KMP_NESTED_HOT_TEAMS
5364 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5367 __kmp_add_threads_to_team(team, new_nproc);
5371 __kmp_initialize_team(team, new_nproc, new_icvs,
5372 root->r.r_uber_thread->th.th_ident);
5375 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5376 for (f = 0; f < team->t.t_nproc; ++f)
5377 __kmp_initialize_info(team->t.t_threads[f], team, f,
5378 __kmp_gtid_from_tid(f, team));
5381 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5382 for (f = old_nproc; f < team->t.t_nproc; ++f)
5383 team->t.t_threads[f]->th.th_task_state = old_state;
5386 for (f = 0; f < team->t.t_nproc; ++f) {
5387 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5388 team->t.t_threads[f]->th.th_team_nproc ==
5393 if (do_place_partition) {
5394 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5395#if KMP_AFFINITY_SUPPORTED
5396 __kmp_partition_places(team);
5401 kmp_info_t *master = team->t.t_threads[0];
5402 if (master->th.th_teams_microtask) {
5403 for (f = 1; f < new_nproc; ++f) {
5405 kmp_info_t *thr = team->t.t_threads[f];
5406 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5407 thr->th.th_teams_level = master->th.th_teams_level;
5408 thr->th.th_teams_size = master->th.th_teams_size;
5411#if KMP_NESTED_HOT_TEAMS
5415 for (f = 1; f < new_nproc; ++f) {
5416 kmp_info_t *thr = team->t.t_threads[f];
5418 kmp_balign_t *balign = thr->th.th_bar;
5419 for (b = 0; b < bs_last_barrier; ++b) {
5420 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5421 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5423 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5431 __kmp_alloc_argv_entries(argc, team, TRUE);
5432 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5436 KF_TRACE(10, (
" hot_team = %p\n", team));
5439 if (__kmp_tasking_mode != tskm_immediate_exec) {
5440 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5441 "task_team[1] = %p after reinit\n",
5442 team->t.t_task_team[0], team->t.t_task_team[1]));
5447 __ompt_team_assign_id(team, ompt_parallel_data);
5457 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5460 if (team->t.t_max_nproc >= max_nproc) {
5462 __kmp_team_pool = team->t.t_next_pool;
5464 if (max_nproc > 1 &&
5465 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5467 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5472 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5474 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5475 "task_team[1] %p to NULL\n",
5476 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5477 team->t.t_task_team[0] = NULL;
5478 team->t.t_task_team[1] = NULL;
5481 __kmp_alloc_argv_entries(argc, team, TRUE);
5482 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5485 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5486 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5489 for (b = 0; b < bs_last_barrier; ++b) {
5490 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5492 team->t.t_bar[b].b_master_arrived = 0;
5493 team->t.t_bar[b].b_team_arrived = 0;
5498 team->t.t_proc_bind = new_proc_bind;
5500 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5504 __ompt_team_assign_id(team, ompt_parallel_data);
5516 team = __kmp_reap_team(team);
5517 __kmp_team_pool = team;
5522 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5525 team->t.t_max_nproc = max_nproc;
5526 if (max_nproc > 1 &&
5527 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5529 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5534 __kmp_allocate_team_arrays(team, max_nproc);
5536 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5537 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5539 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5541 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5542 team->t.t_task_team[0] = NULL;
5544 team->t.t_task_team[1] = NULL;
5547 if (__kmp_storage_map) {
5548 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5552 __kmp_alloc_argv_entries(argc, team, FALSE);
5553 team->t.t_argc = argc;
5556 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5557 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5560 for (b = 0; b < bs_last_barrier; ++b) {
5561 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5563 team->t.t_bar[b].b_master_arrived = 0;
5564 team->t.t_bar[b].b_team_arrived = 0;
5569 team->t.t_proc_bind = new_proc_bind;
5572 __ompt_team_assign_id(team, ompt_parallel_data);
5573 team->t.ompt_serialized_team_info = NULL;
5578 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5589void __kmp_free_team(kmp_root_t *root,
5590 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5592 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5596 KMP_DEBUG_ASSERT(root);
5597 KMP_DEBUG_ASSERT(team);
5598 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5599 KMP_DEBUG_ASSERT(team->t.t_threads);
5601 int use_hot_team = team == root->r.r_hot_team;
5602#if KMP_NESTED_HOT_TEAMS
5605 level = team->t.t_active_level - 1;
5606 if (master->th.th_teams_microtask) {
5607 if (master->th.th_teams_size.nteams > 1) {
5611 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5612 master->th.th_teams_level == team->t.t_level) {
5618 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5620 if (level < __kmp_hot_teams_max_level) {
5621 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5628 TCW_SYNC_PTR(team->t.t_pkfn,
5631 team->t.t_copyin_counter = 0;
5636 if (!use_hot_team) {
5637 if (__kmp_tasking_mode != tskm_immediate_exec) {
5639 for (f = 1; f < team->t.t_nproc; ++f) {
5640 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5641 kmp_info_t *th = team->t.t_threads[f];
5642 volatile kmp_uint32 *state = &th->th.th_reap_state;
5643 while (*state != KMP_SAFE_TO_REAP) {
5647 if (!__kmp_is_thread_alive(th, &ecode)) {
5648 *state = KMP_SAFE_TO_REAP;
5653 if (th->th.th_sleep_loc)
5654 __kmp_null_resume_wrapper(th);
5661 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5662 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5663 if (task_team != NULL) {
5664 for (f = 0; f < team->t.t_nproc; ++f) {
5665 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5666 team->t.t_threads[f]->th.th_task_team = NULL;
5670 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5671 __kmp_get_gtid(), task_team, team->t.t_id));
5672#if KMP_NESTED_HOT_TEAMS
5673 __kmp_free_task_team(master, task_team);
5675 team->t.t_task_team[tt_idx] = NULL;
5681 team->t.t_parent = NULL;
5682 team->t.t_level = 0;
5683 team->t.t_active_level = 0;
5686 for (f = 1; f < team->t.t_nproc; ++f) {
5687 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5688 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5689 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5692 __kmp_free_thread(team->t.t_threads[f]);
5695 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5698 team->t.b->go_release();
5699 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5700 for (f = 1; f < team->t.t_nproc; ++f) {
5701 if (team->t.b->sleep[f].sleep) {
5702 __kmp_atomic_resume_64(
5703 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5704 (kmp_atomic_flag_64<> *)NULL);
5709 for (
int f = 1; f < team->t.t_nproc; ++f) {
5710 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5716 for (f = 1; f < team->t.t_nproc; ++f) {
5717 team->t.t_threads[f] = NULL;
5720 if (team->t.t_max_nproc > 1 &&
5721 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5722 distributedBarrier::deallocate(team->t.b);
5727 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5728 __kmp_team_pool = (
volatile kmp_team_t *)team;
5731 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5732 team->t.t_threads[1]->th.th_cg_roots);
5733 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5735 for (f = 1; f < team->t.t_nproc; ++f) {
5736 kmp_info_t *thr = team->t.t_threads[f];
5737 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5738 thr->th.th_cg_roots->cg_root == thr);
5740 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5741 thr->th.th_cg_roots = tmp->up;
5742 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5743 " up to node %p. cg_nthreads was %d\n",
5744 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5745 int i = tmp->cg_nthreads--;
5750 if (thr->th.th_cg_roots)
5751 thr->th.th_current_task->td_icvs.thread_limit =
5752 thr->th.th_cg_roots->cg_thread_limit;
5761kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5762 kmp_team_t *next_pool = team->t.t_next_pool;
5764 KMP_DEBUG_ASSERT(team);
5765 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5766 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5767 KMP_DEBUG_ASSERT(team->t.t_threads);
5768 KMP_DEBUG_ASSERT(team->t.t_argv);
5773 __kmp_free_team_arrays(team);
5774 if (team->t.t_argv != &team->t.t_inline_argv[0])
5775 __kmp_free((
void *)team->t.t_argv);
5807void __kmp_free_thread(kmp_info_t *this_th) {
5811 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5812 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5814 KMP_DEBUG_ASSERT(this_th);
5819 kmp_balign_t *balign = this_th->th.th_bar;
5820 for (b = 0; b < bs_last_barrier; ++b) {
5821 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5822 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5823 balign[b].bb.team = NULL;
5824 balign[b].bb.leaf_kids = 0;
5826 this_th->th.th_task_state = 0;
5827 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5830 TCW_PTR(this_th->th.th_team, NULL);
5831 TCW_PTR(this_th->th.th_root, NULL);
5832 TCW_PTR(this_th->th.th_dispatch, NULL);
5834 while (this_th->th.th_cg_roots) {
5835 this_th->th.th_cg_roots->cg_nthreads--;
5836 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5837 " %p of thread %p to %d\n",
5838 this_th, this_th->th.th_cg_roots,
5839 this_th->th.th_cg_roots->cg_root,
5840 this_th->th.th_cg_roots->cg_nthreads));
5841 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5842 if (tmp->cg_root == this_th) {
5843 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5845 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5846 this_th->th.th_cg_roots = tmp->up;
5849 if (tmp->cg_nthreads == 0) {
5852 this_th->th.th_cg_roots = NULL;
5862 __kmp_free_implicit_task(this_th);
5863 this_th->th.th_current_task = NULL;
5867 gtid = this_th->th.th_info.ds.ds_gtid;
5868 if (__kmp_thread_pool_insert_pt != NULL) {
5869 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5870 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5871 __kmp_thread_pool_insert_pt = NULL;
5880 if (__kmp_thread_pool_insert_pt != NULL) {
5881 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5883 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5885 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5886 scan = &((*scan)->th.th_next_pool))
5891 TCW_PTR(this_th->th.th_next_pool, *scan);
5892 __kmp_thread_pool_insert_pt = *scan = this_th;
5893 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5894 (this_th->th.th_info.ds.ds_gtid <
5895 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5896 TCW_4(this_th->th.th_in_pool, TRUE);
5897 __kmp_suspend_initialize_thread(this_th);
5898 __kmp_lock_suspend_mx(this_th);
5899 if (this_th->th.th_active == TRUE) {
5900 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5901 this_th->th.th_active_in_pool = TRUE;
5905 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5908 __kmp_unlock_suspend_mx(this_th);
5910 TCW_4(__kmp_nth, __kmp_nth - 1);
5912#ifdef KMP_ADJUST_BLOCKTIME
5915 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5916 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5917 if (__kmp_nth <= __kmp_avail_proc) {
5918 __kmp_zero_bt = FALSE;
5928void *__kmp_launch_thread(kmp_info_t *this_thr) {
5929#if OMP_PROFILING_SUPPORT
5930 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5932 if (ProfileTraceFile)
5933 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5936 int gtid = this_thr->th.th_info.ds.ds_gtid;
5938 kmp_team_t **
volatile pteam;
5941 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5943 if (__kmp_env_consistency_check) {
5944 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5948 if (ompd_state & OMPD_ENABLE_BP)
5949 ompd_bp_thread_begin();
5953 ompt_data_t *thread_data =
nullptr;
5954 if (ompt_enabled.enabled) {
5955 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5956 *thread_data = ompt_data_none;
5958 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5959 this_thr->th.ompt_thread_info.wait_id = 0;
5960 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5961 this_thr->th.ompt_thread_info.parallel_flags = 0;
5962 if (ompt_enabled.ompt_callback_thread_begin) {
5963 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5964 ompt_thread_worker, thread_data);
5966 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5971 while (!TCR_4(__kmp_global.g.g_done)) {
5972 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5976 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5979 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5982 if (ompt_enabled.enabled) {
5983 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5987 pteam = &this_thr->th.th_team;
5990 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5992 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5995 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5996 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5997 (*pteam)->t.t_pkfn));
5999 updateHWFPControl(*pteam);
6002 if (ompt_enabled.enabled) {
6003 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6007 rc = (*pteam)->t.t_invoke(gtid);
6011 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6012 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6013 (*pteam)->t.t_pkfn));
6016 if (ompt_enabled.enabled) {
6018 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6020 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6024 __kmp_join_barrier(gtid);
6029 if (ompd_state & OMPD_ENABLE_BP)
6030 ompd_bp_thread_end();
6034 if (ompt_enabled.ompt_callback_thread_end) {
6035 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6039 this_thr->th.th_task_team = NULL;
6041 __kmp_common_destroy_gtid(gtid);
6043 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6046#if OMP_PROFILING_SUPPORT
6047 llvm::timeTraceProfilerFinishThread();
6054void __kmp_internal_end_dest(
void *specific_gtid) {
6057 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6059 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6063 __kmp_internal_end_thread(gtid);
6066#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6068__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6069 __kmp_internal_end_atexit();
6076void __kmp_internal_end_atexit(
void) {
6077 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6101 __kmp_internal_end_library(-1);
6103 __kmp_close_console();
6107static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6112 KMP_DEBUG_ASSERT(thread != NULL);
6114 gtid = thread->th.th_info.ds.ds_gtid;
6117 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6120 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6122 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6124 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6126 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6130 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6132 __kmp_release_64(&flag);
6137 __kmp_reap_worker(thread);
6149 if (thread->th.th_active_in_pool) {
6150 thread->th.th_active_in_pool = FALSE;
6151 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6152 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6156 __kmp_free_implicit_task(thread);
6160 __kmp_free_fast_memory(thread);
6163 __kmp_suspend_uninitialize_thread(thread);
6165 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6166 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6171#ifdef KMP_ADJUST_BLOCKTIME
6174 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6175 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6176 if (__kmp_nth <= __kmp_avail_proc) {
6177 __kmp_zero_bt = FALSE;
6183 if (__kmp_env_consistency_check) {
6184 if (thread->th.th_cons) {
6185 __kmp_free_cons_stack(thread->th.th_cons);
6186 thread->th.th_cons = NULL;
6190 if (thread->th.th_pri_common != NULL) {
6191 __kmp_free(thread->th.th_pri_common);
6192 thread->th.th_pri_common = NULL;
6196 if (thread->th.th_local.bget_data != NULL) {
6197 __kmp_finalize_bget(thread);
6201#if KMP_AFFINITY_SUPPORTED
6202 if (thread->th.th_affin_mask != NULL) {
6203 KMP_CPU_FREE(thread->th.th_affin_mask);
6204 thread->th.th_affin_mask = NULL;
6208#if KMP_USE_HIER_SCHED
6209 if (thread->th.th_hier_bar_data != NULL) {
6210 __kmp_free(thread->th.th_hier_bar_data);
6211 thread->th.th_hier_bar_data = NULL;
6215 __kmp_reap_team(thread->th.th_serial_team);
6216 thread->th.th_serial_team = NULL;
6223static void __kmp_itthash_clean(kmp_info_t *th) {
6225 if (__kmp_itt_region_domains.count > 0) {
6226 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6227 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6229 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6230 __kmp_thread_free(th, bucket);
6235 if (__kmp_itt_barrier_domains.count > 0) {
6236 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6237 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6239 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6240 __kmp_thread_free(th, bucket);
6248static void __kmp_internal_end(
void) {
6252 __kmp_unregister_library();
6259 __kmp_reclaim_dead_roots();
6263 for (i = 0; i < __kmp_threads_capacity; i++)
6265 if (__kmp_root[i]->r.r_active)
6268 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6270 if (i < __kmp_threads_capacity) {
6282 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6283 if (TCR_4(__kmp_init_monitor)) {
6284 __kmp_reap_monitor(&__kmp_monitor);
6285 TCW_4(__kmp_init_monitor, 0);
6287 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6288 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6294 for (i = 0; i < __kmp_threads_capacity; i++) {
6295 if (__kmp_root[i]) {
6298 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6307 while (__kmp_thread_pool != NULL) {
6309 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6310 __kmp_thread_pool = thread->th.th_next_pool;
6312 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6313 thread->th.th_next_pool = NULL;
6314 thread->th.th_in_pool = FALSE;
6315 __kmp_reap_thread(thread, 0);
6317 __kmp_thread_pool_insert_pt = NULL;
6320 while (__kmp_team_pool != NULL) {
6322 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6323 __kmp_team_pool = team->t.t_next_pool;
6325 team->t.t_next_pool = NULL;
6326 __kmp_reap_team(team);
6329 __kmp_reap_task_teams();
6336 for (i = 0; i < __kmp_threads_capacity; i++) {
6337 kmp_info_t *thr = __kmp_threads[i];
6338 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6343 for (i = 0; i < __kmp_threads_capacity; ++i) {
6350 TCW_SYNC_4(__kmp_init_common, FALSE);
6352 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6360 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6361 if (TCR_4(__kmp_init_monitor)) {
6362 __kmp_reap_monitor(&__kmp_monitor);
6363 TCW_4(__kmp_init_monitor, 0);
6365 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6366 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6369 TCW_4(__kmp_init_gtid, FALSE);
6378void __kmp_internal_end_library(
int gtid_req) {
6385 if (__kmp_global.g.g_abort) {
6386 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6390 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6391 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6396 if (TCR_4(__kmp_init_hidden_helper) &&
6397 !TCR_4(__kmp_hidden_helper_team_done)) {
6398 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6400 __kmp_hidden_helper_main_thread_release();
6402 __kmp_hidden_helper_threads_deinitz_wait();
6408 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6410 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6411 if (gtid == KMP_GTID_SHUTDOWN) {
6412 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6413 "already shutdown\n"));
6415 }
else if (gtid == KMP_GTID_MONITOR) {
6416 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6417 "registered, or system shutdown\n"));
6419 }
else if (gtid == KMP_GTID_DNE) {
6420 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6423 }
else if (KMP_UBER_GTID(gtid)) {
6425 if (__kmp_root[gtid]->r.r_active) {
6426 __kmp_global.g.g_abort = -1;
6427 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6428 __kmp_unregister_library();
6430 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6434 __kmp_itthash_clean(__kmp_threads[gtid]);
6437 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6438 __kmp_unregister_root_current_thread(gtid);
6445#ifdef DUMP_DEBUG_ON_EXIT
6446 if (__kmp_debug_buf)
6447 __kmp_dump_debug_buffer();
6452 __kmp_unregister_library();
6457 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6460 if (__kmp_global.g.g_abort) {
6461 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6463 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6466 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6467 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6476 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6479 __kmp_internal_end();
6481 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6482 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6484 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6486#ifdef DUMP_DEBUG_ON_EXIT
6487 if (__kmp_debug_buf)
6488 __kmp_dump_debug_buffer();
6492 __kmp_close_console();
6495 __kmp_fini_allocator();
6499void __kmp_internal_end_thread(
int gtid_req) {
6508 if (__kmp_global.g.g_abort) {
6509 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6513 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6514 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6519 if (TCR_4(__kmp_init_hidden_helper) &&
6520 !TCR_4(__kmp_hidden_helper_team_done)) {
6521 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6523 __kmp_hidden_helper_main_thread_release();
6525 __kmp_hidden_helper_threads_deinitz_wait();
6532 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6534 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6535 if (gtid == KMP_GTID_SHUTDOWN) {
6536 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6537 "already shutdown\n"));
6539 }
else if (gtid == KMP_GTID_MONITOR) {
6540 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6541 "registered, or system shutdown\n"));
6543 }
else if (gtid == KMP_GTID_DNE) {
6544 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6548 }
else if (KMP_UBER_GTID(gtid)) {
6550 if (__kmp_root[gtid]->r.r_active) {
6551 __kmp_global.g.g_abort = -1;
6552 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6554 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6558 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6560 __kmp_unregister_root_current_thread(gtid);
6564 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6567 __kmp_threads[gtid]->th.th_task_team = NULL;
6571 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6577 if (__kmp_pause_status != kmp_hard_paused)
6581 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6586 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6589 if (__kmp_global.g.g_abort) {
6590 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6592 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6595 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6596 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6607 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6609 for (i = 0; i < __kmp_threads_capacity; ++i) {
6610 if (KMP_UBER_GTID(i)) {
6613 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6614 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6615 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6622 __kmp_internal_end();
6624 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6625 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6627 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6629#ifdef DUMP_DEBUG_ON_EXIT
6630 if (__kmp_debug_buf)
6631 __kmp_dump_debug_buffer();
6638static long __kmp_registration_flag = 0;
6640static char *__kmp_registration_str = NULL;
6643static inline char *__kmp_reg_status_name() {
6649#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6650 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6653 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6657#if defined(KMP_USE_SHM)
6658bool __kmp_shm_available =
false;
6659bool __kmp_tmp_available =
false;
6661char *temp_reg_status_file_name =
nullptr;
6664void __kmp_register_library_startup(
void) {
6666 char *name = __kmp_reg_status_name();
6672#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6673 __kmp_initialize_system_tick();
6675 __kmp_read_system_time(&time.dtime);
6676 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6677 __kmp_registration_str =
6678 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6679 __kmp_registration_flag, KMP_LIBRARY_FILE);
6681 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6682 __kmp_registration_str));
6688#if defined(KMP_USE_SHM)
6689 char *shm_name =
nullptr;
6690 char *data1 =
nullptr;
6691 __kmp_shm_available = __kmp_detect_shm();
6692 if (__kmp_shm_available) {
6694 shm_name = __kmp_str_format(
"/%s", name);
6695 int shm_preexist = 0;
6696 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6697 if ((fd1 == -1) && (errno == EEXIST)) {
6700 fd1 = shm_open(shm_name, O_RDWR, 0600);
6702 KMP_WARNING(FunctionError,
"Can't open SHM");
6703 __kmp_shm_available =
false;
6708 if (__kmp_shm_available && shm_preexist == 0) {
6709 if (ftruncate(fd1, SHM_SIZE) == -1) {
6710 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6711 __kmp_shm_available =
false;
6714 if (__kmp_shm_available) {
6715 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6717 if (data1 == MAP_FAILED) {
6718 KMP_WARNING(FunctionError,
"Can't map SHM");
6719 __kmp_shm_available =
false;
6722 if (__kmp_shm_available) {
6723 if (shm_preexist == 0) {
6724 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6727 value = __kmp_str_format(
"%s", data1);
6728 munmap(data1, SHM_SIZE);
6733 if (!__kmp_shm_available)
6734 __kmp_tmp_available = __kmp_detect_tmp();
6735 if (!__kmp_shm_available && __kmp_tmp_available) {
6742 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6743 int tmp_preexist = 0;
6744 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6745 if ((fd1 == -1) && (errno == EEXIST)) {
6748 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6750 KMP_WARNING(FunctionError,
"Can't open TEMP");
6751 __kmp_tmp_available =
false;
6756 if (__kmp_tmp_available && tmp_preexist == 0) {
6758 if (ftruncate(fd1, SHM_SIZE) == -1) {
6759 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6760 __kmp_tmp_available =
false;
6763 if (__kmp_tmp_available) {
6764 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6766 if (data1 == MAP_FAILED) {
6767 KMP_WARNING(FunctionError,
"Can't map /tmp");
6768 __kmp_tmp_available =
false;
6771 if (__kmp_tmp_available) {
6772 if (tmp_preexist == 0) {
6773 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6776 value = __kmp_str_format(
"%s", data1);
6777 munmap(data1, SHM_SIZE);
6782 if (!__kmp_shm_available && !__kmp_tmp_available) {
6785 __kmp_env_set(name, __kmp_registration_str, 0);
6787 value = __kmp_env_get(name);
6791 __kmp_env_set(name, __kmp_registration_str, 0);
6793 value = __kmp_env_get(name);
6796 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6803 char *flag_addr_str = NULL;
6804 char *flag_val_str = NULL;
6805 char const *file_name = NULL;
6806 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6807 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6810 unsigned long *flag_addr = 0;
6811 unsigned long flag_val = 0;
6812 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6813 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6814 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6818 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6832 file_name =
"unknown library";
6837 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6838 if (!__kmp_str_match_true(duplicate_ok)) {
6840 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6841 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6843 KMP_INTERNAL_FREE(duplicate_ok);
6844 __kmp_duplicate_library_ok = 1;
6849#if defined(KMP_USE_SHM)
6850 if (__kmp_shm_available) {
6851 shm_unlink(shm_name);
6852 }
else if (__kmp_tmp_available) {
6853 unlink(temp_reg_status_file_name);
6856 __kmp_env_unset(name);
6860 __kmp_env_unset(name);
6864 KMP_DEBUG_ASSERT(0);
6868 KMP_INTERNAL_FREE((
void *)value);
6869#if defined(KMP_USE_SHM)
6871 KMP_INTERNAL_FREE((
void *)shm_name);
6874 KMP_INTERNAL_FREE((
void *)name);
6878void __kmp_unregister_library(
void) {
6880 char *name = __kmp_reg_status_name();
6883#if defined(KMP_USE_SHM)
6884 char *shm_name =
nullptr;
6886 if (__kmp_shm_available) {
6887 shm_name = __kmp_str_format(
"/%s", name);
6888 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6890 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6891 if (data1 != MAP_FAILED) {
6892 value = __kmp_str_format(
"%s", data1);
6893 munmap(data1, SHM_SIZE);
6897 }
else if (__kmp_tmp_available) {
6898 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6900 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6901 if (data1 != MAP_FAILED) {
6902 value = __kmp_str_format(
"%s", data1);
6903 munmap(data1, SHM_SIZE);
6908 value = __kmp_env_get(name);
6911 value = __kmp_env_get(name);
6914 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6915 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6916 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6918#if defined(KMP_USE_SHM)
6919 if (__kmp_shm_available) {
6920 shm_unlink(shm_name);
6921 }
else if (__kmp_tmp_available) {
6922 unlink(temp_reg_status_file_name);
6924 __kmp_env_unset(name);
6927 __kmp_env_unset(name);
6931#if defined(KMP_USE_SHM)
6933 KMP_INTERNAL_FREE(shm_name);
6934 if (temp_reg_status_file_name)
6935 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6938 KMP_INTERNAL_FREE(__kmp_registration_str);
6939 KMP_INTERNAL_FREE(value);
6940 KMP_INTERNAL_FREE(name);
6942 __kmp_registration_flag = 0;
6943 __kmp_registration_str = NULL;
6950#if KMP_MIC_SUPPORTED
6952static void __kmp_check_mic_type() {
6953 kmp_cpuid_t cpuid_state = {0};
6954 kmp_cpuid_t *cs_p = &cpuid_state;
6955 __kmp_x86_cpuid(1, 0, cs_p);
6957 if ((cs_p->eax & 0xff0) == 0xB10) {
6958 __kmp_mic_type = mic2;
6959 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6960 __kmp_mic_type = mic3;
6962 __kmp_mic_type = non_mic;
6969static void __kmp_user_level_mwait_init() {
6970 struct kmp_cpuid buf;
6971 __kmp_x86_cpuid(7, 0, &buf);
6972 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6973 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6974 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6975 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6976 __kmp_umwait_enabled));
6979#ifndef AT_INTELPHIUSERMWAIT
6982#define AT_INTELPHIUSERMWAIT 10000
6987unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6988unsigned long getauxval(
unsigned long) {
return 0; }
6990static void __kmp_user_level_mwait_init() {
6995 if (__kmp_mic_type == mic3) {
6996 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6997 if ((res & 0x1) || __kmp_user_level_mwait) {
6998 __kmp_mwait_enabled = TRUE;
6999 if (__kmp_user_level_mwait) {
7000 KMP_INFORM(EnvMwaitWarn);
7003 __kmp_mwait_enabled = FALSE;
7006 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7007 "__kmp_mwait_enabled = %d\n",
7008 __kmp_mic_type, __kmp_mwait_enabled));
7012static void __kmp_do_serial_initialize(
void) {
7016 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7018 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7019 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7020 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7021 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7022 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7032 __kmp_validate_locks();
7034#if ENABLE_LIBOMPTARGET
7036 __kmp_init_omptarget();
7040 __kmp_init_allocator();
7046 if (__kmp_need_register_serial)
7047 __kmp_register_library_startup();
7050 if (TCR_4(__kmp_global.g.g_done)) {
7051 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7054 __kmp_global.g.g_abort = 0;
7055 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7058#if KMP_USE_ADAPTIVE_LOCKS
7059#if KMP_DEBUG_ADAPTIVE_LOCKS
7060 __kmp_init_speculative_stats();
7063#if KMP_STATS_ENABLED
7066 __kmp_init_lock(&__kmp_global_lock);
7067 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7068 __kmp_init_lock(&__kmp_debug_lock);
7069 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7070 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7071 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7072 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7073 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7074 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7075 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7076 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7077 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7078 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7079 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7080 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7081 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7082 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7083 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7085 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7087 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7091 __kmp_runtime_initialize();
7093#if KMP_MIC_SUPPORTED
7094 __kmp_check_mic_type();
7101 __kmp_abort_delay = 0;
7105 __kmp_dflt_team_nth_ub = __kmp_xproc;
7106 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7107 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7109 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7110 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7112 __kmp_max_nth = __kmp_sys_max_nth;
7113 __kmp_cg_max_nth = __kmp_sys_max_nth;
7114 __kmp_teams_max_nth = __kmp_xproc;
7115 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7116 __kmp_teams_max_nth = __kmp_sys_max_nth;
7121 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7123 __kmp_monitor_wakeups =
7124 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7125 __kmp_bt_intervals =
7126 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7129 __kmp_library = library_throughput;
7131 __kmp_static = kmp_sch_static_balanced;
7138#if KMP_FAST_REDUCTION_BARRIER
7139#define kmp_reduction_barrier_gather_bb ((int)1)
7140#define kmp_reduction_barrier_release_bb ((int)1)
7141#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7142#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7144 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7145 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7146 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7147 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7148 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7149#if KMP_FAST_REDUCTION_BARRIER
7150 if (i == bs_reduction_barrier) {
7152 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7153 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7154 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7155 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7159#if KMP_FAST_REDUCTION_BARRIER
7160#undef kmp_reduction_barrier_release_pat
7161#undef kmp_reduction_barrier_gather_pat
7162#undef kmp_reduction_barrier_release_bb
7163#undef kmp_reduction_barrier_gather_bb
7165#if KMP_MIC_SUPPORTED
7166 if (__kmp_mic_type == mic2) {
7168 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7169 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7171 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7172 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7174#if KMP_FAST_REDUCTION_BARRIER
7175 if (__kmp_mic_type == mic2) {
7176 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7177 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7184 __kmp_env_checks = TRUE;
7186 __kmp_env_checks = FALSE;
7190 __kmp_foreign_tp = TRUE;
7192 __kmp_global.g.g_dynamic = FALSE;
7193 __kmp_global.g.g_dynamic_mode = dynamic_default;
7195 __kmp_init_nesting_mode();
7197 __kmp_env_initialize(NULL);
7199#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7200 __kmp_user_level_mwait_init();
7204 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7205 if (__kmp_str_match_true(val)) {
7206 kmp_str_buf_t buffer;
7207 __kmp_str_buf_init(&buffer);
7208 __kmp_i18n_dump_catalog(&buffer);
7209 __kmp_printf(
"%s", buffer.str);
7210 __kmp_str_buf_free(&buffer);
7212 __kmp_env_free(&val);
7215 __kmp_threads_capacity =
7216 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7218 __kmp_tp_capacity = __kmp_default_tp_capacity(
7219 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7224 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7225 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7226 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7227 __kmp_thread_pool = NULL;
7228 __kmp_thread_pool_insert_pt = NULL;
7229 __kmp_team_pool = NULL;
7236 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7238 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7239 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7240 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7243 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7245 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7250 gtid = __kmp_register_root(TRUE);
7251 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7252 KMP_ASSERT(KMP_UBER_GTID(gtid));
7253 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7257 __kmp_common_initialize();
7261 __kmp_register_atfork();
7264#if !KMP_DYNAMIC_LIB || \
7265 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7270 int rc = atexit(__kmp_internal_end_atexit);
7272 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7278#if KMP_HANDLE_SIGNALS
7284 __kmp_install_signals(FALSE);
7287 __kmp_install_signals(TRUE);
7292 __kmp_init_counter++;
7294 __kmp_init_serial = TRUE;
7296 if (__kmp_version) {
7297 __kmp_print_version_1();
7300 if (__kmp_settings) {
7304 if (__kmp_display_env || __kmp_display_env_verbose) {
7305 __kmp_env_print_2();
7314 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7317void __kmp_serial_initialize(
void) {
7318 if (__kmp_init_serial) {
7321 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7322 if (__kmp_init_serial) {
7323 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7326 __kmp_do_serial_initialize();
7327 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7330static void __kmp_do_middle_initialize(
void) {
7332 int prev_dflt_team_nth;
7334 if (!__kmp_init_serial) {
7335 __kmp_do_serial_initialize();
7338 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7340 if (UNLIKELY(!__kmp_need_register_serial)) {
7343 __kmp_register_library_startup();
7348 prev_dflt_team_nth = __kmp_dflt_team_nth;
7350#if KMP_AFFINITY_SUPPORTED
7353 __kmp_affinity_initialize(__kmp_affinity);
7357 KMP_ASSERT(__kmp_xproc > 0);
7358 if (__kmp_avail_proc == 0) {
7359 __kmp_avail_proc = __kmp_xproc;
7365 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7366 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7371 if (__kmp_dflt_team_nth == 0) {
7372#ifdef KMP_DFLT_NTH_CORES
7374 __kmp_dflt_team_nth = __kmp_ncores;
7375 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7376 "__kmp_ncores (%d)\n",
7377 __kmp_dflt_team_nth));
7380 __kmp_dflt_team_nth = __kmp_avail_proc;
7381 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7382 "__kmp_avail_proc(%d)\n",
7383 __kmp_dflt_team_nth));
7387 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7388 __kmp_dflt_team_nth = KMP_MIN_NTH;
7390 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7391 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7394 if (__kmp_nesting_mode > 0)
7395 __kmp_set_nesting_mode_threads();
7399 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7401 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7406 for (i = 0; i < __kmp_threads_capacity; i++) {
7407 kmp_info_t *thread = __kmp_threads[i];
7410 if (thread->th.th_current_task->td_icvs.nproc != 0)
7413 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7418 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7419 __kmp_dflt_team_nth));
7421#ifdef KMP_ADJUST_BLOCKTIME
7423 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7424 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7425 if (__kmp_nth > __kmp_avail_proc) {
7426 __kmp_zero_bt = TRUE;
7432 TCW_SYNC_4(__kmp_init_middle, TRUE);
7434 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7437void __kmp_middle_initialize(
void) {
7438 if (__kmp_init_middle) {
7441 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7442 if (__kmp_init_middle) {
7443 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7446 __kmp_do_middle_initialize();
7447 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7450void __kmp_parallel_initialize(
void) {
7451 int gtid = __kmp_entry_gtid();
7454 if (TCR_4(__kmp_init_parallel))
7456 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7457 if (TCR_4(__kmp_init_parallel)) {
7458 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7463 if (TCR_4(__kmp_global.g.g_done)) {
7466 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7467 __kmp_infinite_loop();
7473 if (!__kmp_init_middle) {
7474 __kmp_do_middle_initialize();
7476 __kmp_assign_root_init_mask();
7477 __kmp_resume_if_hard_paused();
7480 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7481 KMP_ASSERT(KMP_UBER_GTID(gtid));
7483#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7486 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7487 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7488 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7492#if KMP_HANDLE_SIGNALS
7494 __kmp_install_signals(TRUE);
7498 __kmp_suspend_initialize();
7500#if defined(USE_LOAD_BALANCE)
7501 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7502 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7505 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7506 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7510 if (__kmp_version) {
7511 __kmp_print_version_2();
7515 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7518 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7520 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7523void __kmp_hidden_helper_initialize() {
7524 if (TCR_4(__kmp_init_hidden_helper))
7528 if (!TCR_4(__kmp_init_parallel))
7529 __kmp_parallel_initialize();
7533 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7534 if (TCR_4(__kmp_init_hidden_helper)) {
7535 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7539#if KMP_AFFINITY_SUPPORTED
7543 if (!__kmp_hh_affinity.flags.initialized)
7544 __kmp_affinity_initialize(__kmp_hh_affinity);
7548 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7552 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7555 __kmp_do_initialize_hidden_helper_threads();
7558 __kmp_hidden_helper_threads_initz_wait();
7561 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7563 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7568void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7570 kmp_disp_t *dispatch;
7575 this_thr->th.th_local.this_construct = 0;
7577 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7579 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7580 KMP_DEBUG_ASSERT(dispatch);
7581 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7585 dispatch->th_disp_index = 0;
7586 dispatch->th_doacross_buf_idx = 0;
7587 if (__kmp_env_consistency_check)
7588 __kmp_push_parallel(gtid, team->t.t_ident);
7593void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7595 if (__kmp_env_consistency_check)
7596 __kmp_pop_parallel(gtid, team->t.t_ident);
7598 __kmp_finish_implicit_task(this_thr);
7601int __kmp_invoke_task_func(
int gtid) {
7603 int tid = __kmp_tid_from_gtid(gtid);
7604 kmp_info_t *this_thr = __kmp_threads[gtid];
7605 kmp_team_t *team = this_thr->th.th_team;
7607 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7609 if (__itt_stack_caller_create_ptr) {
7611 if (team->t.t_stack_id != NULL) {
7612 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7614 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7615 __kmp_itt_stack_callee_enter(
7616 (__itt_caller)team->t.t_parent->t.t_stack_id);
7620#if INCLUDE_SSC_MARKS
7621 SSC_MARK_INVOKING();
7626 void **exit_frame_p;
7627 ompt_data_t *my_task_data;
7628 ompt_data_t *my_parallel_data;
7631 if (ompt_enabled.enabled) {
7632 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7633 .ompt_task_info.frame.exit_frame.ptr);
7635 exit_frame_p = &dummy;
7639 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7640 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7641 if (ompt_enabled.ompt_callback_implicit_task) {
7642 ompt_team_size = team->t.t_nproc;
7643 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7644 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7645 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7646 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7650#if KMP_STATS_ENABLED
7652 if (previous_state == stats_state_e::TEAMS_REGION) {
7653 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7655 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7657 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7660 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7661 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7668 *exit_frame_p = NULL;
7669 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7672#if KMP_STATS_ENABLED
7673 if (previous_state == stats_state_e::TEAMS_REGION) {
7674 KMP_SET_THREAD_STATE(previous_state);
7676 KMP_POP_PARTITIONED_TIMER();
7680 if (__itt_stack_caller_create_ptr) {
7682 if (team->t.t_stack_id != NULL) {
7683 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7685 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7686 __kmp_itt_stack_callee_leave(
7687 (__itt_caller)team->t.t_parent->t.t_stack_id);
7691 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7696void __kmp_teams_master(
int gtid) {
7698 kmp_info_t *thr = __kmp_threads[gtid];
7699 kmp_team_t *team = thr->th.th_team;
7700 ident_t *loc = team->t.t_ident;
7701 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7702 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7703 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7704 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7705 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7708 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7711 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7712 tmp->cg_nthreads = 1;
7713 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7714 " cg_nthreads to 1\n",
7716 tmp->up = thr->th.th_cg_roots;
7717 thr->th.th_cg_roots = tmp;
7721#if INCLUDE_SSC_MARKS
7724 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7725 (microtask_t)thr->th.th_teams_microtask,
7726 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7727#if INCLUDE_SSC_MARKS
7731 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7732 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7735 __kmp_join_call(loc, gtid
7744int __kmp_invoke_teams_master(
int gtid) {
7745 kmp_info_t *this_thr = __kmp_threads[gtid];
7746 kmp_team_t *team = this_thr->th.th_team;
7748 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7749 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7750 (
void *)__kmp_teams_master);
7752 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7754 int tid = __kmp_tid_from_gtid(gtid);
7755 ompt_data_t *task_data =
7756 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7757 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7758 if (ompt_enabled.ompt_callback_implicit_task) {
7759 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7760 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7762 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7765 __kmp_teams_master(gtid);
7767 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7769 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7778void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7779 kmp_info_t *thr = __kmp_threads[gtid];
7781 if (num_threads > 0)
7782 thr->th.th_set_nproc = num_threads;
7785static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7787 KMP_DEBUG_ASSERT(thr);
7789 if (!TCR_4(__kmp_init_middle))
7790 __kmp_middle_initialize();
7791 __kmp_assign_root_init_mask();
7792 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7793 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7795 if (num_threads == 0) {
7796 if (__kmp_teams_thread_limit > 0) {
7797 num_threads = __kmp_teams_thread_limit;
7799 num_threads = __kmp_avail_proc / num_teams;
7804 if (num_threads > __kmp_dflt_team_nth) {
7805 num_threads = __kmp_dflt_team_nth;
7807 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7808 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7810 if (num_teams * num_threads > __kmp_teams_max_nth) {
7811 num_threads = __kmp_teams_max_nth / num_teams;
7813 if (num_threads == 0) {
7817 if (num_threads < 0) {
7818 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7824 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7826 if (num_threads > __kmp_dflt_team_nth) {
7827 num_threads = __kmp_dflt_team_nth;
7829 if (num_teams * num_threads > __kmp_teams_max_nth) {
7830 int new_threads = __kmp_teams_max_nth / num_teams;
7831 if (new_threads == 0) {
7834 if (new_threads != num_threads) {
7835 if (!__kmp_reserve_warn) {
7836 __kmp_reserve_warn = 1;
7837 __kmp_msg(kmp_ms_warning,
7838 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7839 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7842 num_threads = new_threads;
7845 thr->th.th_teams_size.nth = num_threads;
7850void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7852 kmp_info_t *thr = __kmp_threads[gtid];
7853 if (num_teams < 0) {
7856 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7860 if (num_teams == 0) {
7861 if (__kmp_nteams > 0) {
7862 num_teams = __kmp_nteams;
7867 if (num_teams > __kmp_teams_max_nth) {
7868 if (!__kmp_reserve_warn) {
7869 __kmp_reserve_warn = 1;
7870 __kmp_msg(kmp_ms_warning,
7871 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7872 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7874 num_teams = __kmp_teams_max_nth;
7878 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7880 __kmp_push_thread_limit(thr, num_teams, num_threads);
7885void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7886 int num_teams_ub,
int num_threads) {
7887 kmp_info_t *thr = __kmp_threads[gtid];
7888 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7889 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7890 KMP_DEBUG_ASSERT(num_threads >= 0);
7892 if (num_teams_lb > num_teams_ub) {
7893 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7894 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7899 if (num_teams_lb == 0 && num_teams_ub > 0)
7900 num_teams_lb = num_teams_ub;
7902 if (num_teams_lb == 0 && num_teams_ub == 0) {
7903 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7904 if (num_teams > __kmp_teams_max_nth) {
7905 if (!__kmp_reserve_warn) {
7906 __kmp_reserve_warn = 1;
7907 __kmp_msg(kmp_ms_warning,
7908 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7909 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7911 num_teams = __kmp_teams_max_nth;
7913 }
else if (num_teams_lb == num_teams_ub) {
7914 num_teams = num_teams_ub;
7916 if (num_threads <= 0) {
7917 if (num_teams_ub > __kmp_teams_max_nth) {
7918 num_teams = num_teams_lb;
7920 num_teams = num_teams_ub;
7923 num_teams = (num_threads > __kmp_teams_max_nth)
7925 : __kmp_teams_max_nth / num_threads;
7926 if (num_teams < num_teams_lb) {
7927 num_teams = num_teams_lb;
7928 }
else if (num_teams > num_teams_ub) {
7929 num_teams = num_teams_ub;
7935 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7937 __kmp_push_thread_limit(thr, num_teams, num_threads);
7941void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7942 kmp_info_t *thr = __kmp_threads[gtid];
7943 thr->th.th_set_proc_bind = proc_bind;
7948void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7949 kmp_info_t *this_thr = __kmp_threads[gtid];
7955 KMP_DEBUG_ASSERT(team);
7956 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7957 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7960 team->t.t_construct = 0;
7961 team->t.t_ordered.dt.t_value =
7965 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7966 if (team->t.t_max_nproc > 1) {
7968 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7969 team->t.t_disp_buffer[i].buffer_index = i;
7970 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7973 team->t.t_disp_buffer[0].buffer_index = 0;
7974 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7978 KMP_ASSERT(this_thr->th.th_team == team);
7981 for (f = 0; f < team->t.t_nproc; f++) {
7982 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7983 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7988 __kmp_fork_barrier(gtid, 0);
7991void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7992 kmp_info_t *this_thr = __kmp_threads[gtid];
7994 KMP_DEBUG_ASSERT(team);
7995 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7996 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8002 if (__kmp_threads[gtid] &&
8003 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8004 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8005 __kmp_threads[gtid]);
8006 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8007 "team->t.t_nproc=%d\n",
8008 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8010 __kmp_print_structure();
8012 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8013 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8016 __kmp_join_barrier(gtid);
8018 if (ompt_enabled.enabled &&
8019 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8020 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8021 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8022 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8024 void *codeptr = NULL;
8025 if (KMP_MASTER_TID(ds_tid) &&
8026 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8027 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8028 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8030 if (ompt_enabled.ompt_callback_sync_region_wait) {
8031 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8032 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8035 if (ompt_enabled.ompt_callback_sync_region) {
8036 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8037 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8041 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8042 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8043 ompt_scope_end, NULL, task_data, 0, ds_tid,
8044 ompt_task_implicit);
8050 KMP_ASSERT(this_thr->th.th_team == team);
8055#ifdef USE_LOAD_BALANCE
8059static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8062 kmp_team_t *hot_team;
8064 if (root->r.r_active) {
8067 hot_team = root->r.r_hot_team;
8068 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8069 return hot_team->t.t_nproc - 1;
8074 for (i = 1; i < hot_team->t.t_nproc; i++) {
8075 if (hot_team->t.t_threads[i]->th.th_active) {
8084static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8087 int hot_team_active;
8088 int team_curr_active;
8091 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8093 KMP_DEBUG_ASSERT(root);
8094 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8095 ->th.th_current_task->td_icvs.dynamic == TRUE);
8096 KMP_DEBUG_ASSERT(set_nproc > 1);
8098 if (set_nproc == 1) {
8099 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8108 pool_active = __kmp_thread_pool_active_nth;
8109 hot_team_active = __kmp_active_hot_team_nproc(root);
8110 team_curr_active = pool_active + hot_team_active + 1;
8113 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8114 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8115 "hot team active = %d\n",
8116 system_active, pool_active, hot_team_active));
8118 if (system_active < 0) {
8122 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8123 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8126 retval = __kmp_avail_proc - __kmp_nth +
8127 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8128 if (retval > set_nproc) {
8131 if (retval < KMP_MIN_NTH) {
8132 retval = KMP_MIN_NTH;
8135 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8143 if (system_active < team_curr_active) {
8144 system_active = team_curr_active;
8146 retval = __kmp_avail_proc - system_active + team_curr_active;
8147 if (retval > set_nproc) {
8150 if (retval < KMP_MIN_NTH) {
8151 retval = KMP_MIN_NTH;
8154 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8163void __kmp_cleanup(
void) {
8166 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8168 if (TCR_4(__kmp_init_parallel)) {
8169#if KMP_HANDLE_SIGNALS
8170 __kmp_remove_signals();
8172 TCW_4(__kmp_init_parallel, FALSE);
8175 if (TCR_4(__kmp_init_middle)) {
8176#if KMP_AFFINITY_SUPPORTED
8177 __kmp_affinity_uninitialize();
8179 __kmp_cleanup_hierarchy();
8180 TCW_4(__kmp_init_middle, FALSE);
8183 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8185 if (__kmp_init_serial) {
8186 __kmp_runtime_destroy();
8187 __kmp_init_serial = FALSE;
8190 __kmp_cleanup_threadprivate_caches();
8192 for (f = 0; f < __kmp_threads_capacity; f++) {
8193 if (__kmp_root[f] != NULL) {
8194 __kmp_free(__kmp_root[f]);
8195 __kmp_root[f] = NULL;
8198 __kmp_free(__kmp_threads);
8201 __kmp_threads = NULL;
8203 __kmp_threads_capacity = 0;
8206 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8208 kmp_old_threads_list_t *next = ptr->next;
8209 __kmp_free(ptr->threads);
8214#if KMP_USE_DYNAMIC_LOCK
8215 __kmp_cleanup_indirect_user_locks();
8217 __kmp_cleanup_user_locks();
8221 __kmp_free(ompd_env_block);
8222 ompd_env_block = NULL;
8223 ompd_env_block_size = 0;
8227#if KMP_AFFINITY_SUPPORTED
8228 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8229 __kmp_cpuinfo_file = NULL;
8232#if KMP_USE_ADAPTIVE_LOCKS
8233#if KMP_DEBUG_ADAPTIVE_LOCKS
8234 __kmp_print_speculative_stats();
8237 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8238 __kmp_nested_nth.nth = NULL;
8239 __kmp_nested_nth.size = 0;
8240 __kmp_nested_nth.used = 0;
8241 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8242 __kmp_nested_proc_bind.bind_types = NULL;
8243 __kmp_nested_proc_bind.size = 0;
8244 __kmp_nested_proc_bind.used = 0;
8245 if (__kmp_affinity_format) {
8246 KMP_INTERNAL_FREE(__kmp_affinity_format);
8247 __kmp_affinity_format = NULL;
8250 __kmp_i18n_catclose();
8252#if KMP_USE_HIER_SCHED
8253 __kmp_hier_scheds.deallocate();
8256#if KMP_STATS_ENABLED
8260 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8265int __kmp_ignore_mppbeg(
void) {
8268 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8269 if (__kmp_str_match_false(env))
8276int __kmp_ignore_mppend(
void) {
8279 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8280 if (__kmp_str_match_false(env))
8287void __kmp_internal_begin(
void) {
8293 gtid = __kmp_entry_gtid();
8294 root = __kmp_threads[gtid]->th.th_root;
8295 KMP_ASSERT(KMP_UBER_GTID(gtid));
8297 if (root->r.r_begin)
8299 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8300 if (root->r.r_begin) {
8301 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8305 root->r.r_begin = TRUE;
8307 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8312void __kmp_user_set_library(
enum library_type arg) {
8319 gtid = __kmp_entry_gtid();
8320 thread = __kmp_threads[gtid];
8322 root = thread->th.th_root;
8324 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8326 if (root->r.r_in_parallel) {
8328 KMP_WARNING(SetLibraryIncorrectCall);
8333 case library_serial:
8334 thread->th.th_set_nproc = 0;
8335 set__nproc(thread, 1);
8337 case library_turnaround:
8338 thread->th.th_set_nproc = 0;
8339 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8340 : __kmp_dflt_team_nth_ub);
8342 case library_throughput:
8343 thread->th.th_set_nproc = 0;
8344 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8345 : __kmp_dflt_team_nth_ub);
8348 KMP_FATAL(UnknownLibraryType, arg);
8351 __kmp_aux_set_library(arg);
8354void __kmp_aux_set_stacksize(
size_t arg) {
8355 if (!__kmp_init_serial)
8356 __kmp_serial_initialize();
8359 if (arg & (0x1000 - 1)) {
8360 arg &= ~(0x1000 - 1);
8365 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8368 if (!TCR_4(__kmp_init_parallel)) {
8371 if (value < __kmp_sys_min_stksize)
8372 value = __kmp_sys_min_stksize;
8373 else if (value > KMP_MAX_STKSIZE)
8374 value = KMP_MAX_STKSIZE;
8376 __kmp_stksize = value;
8378 __kmp_env_stksize = TRUE;
8381 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8386void __kmp_aux_set_library(
enum library_type arg) {
8387 __kmp_library = arg;
8389 switch (__kmp_library) {
8390 case library_serial: {
8391 KMP_INFORM(LibraryIsSerial);
8393 case library_turnaround:
8394 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8395 __kmp_use_yield = 2;
8397 case library_throughput:
8398 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8399 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8402 KMP_FATAL(UnknownLibraryType, arg);
8408static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8409 kmp_info_t *thr = __kmp_entry_thread();
8410 teams_serialized = 0;
8411 if (thr->th.th_teams_microtask) {
8412 kmp_team_t *team = thr->th.th_team;
8413 int tlevel = thr->th.th_teams_level;
8414 int ii = team->t.t_level;
8415 teams_serialized = team->t.t_serialized;
8416 int level = tlevel + 1;
8417 KMP_DEBUG_ASSERT(ii >= tlevel);
8418 while (ii > level) {
8419 for (teams_serialized = team->t.t_serialized;
8420 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8422 if (team->t.t_serialized && (!teams_serialized)) {
8423 team = team->t.t_parent;
8427 team = team->t.t_parent;
8436int __kmp_aux_get_team_num() {
8438 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8440 if (serialized > 1) {
8443 return team->t.t_master_tid;
8449int __kmp_aux_get_num_teams() {
8451 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8453 if (serialized > 1) {
8456 return team->t.t_parent->t.t_nproc;
8495typedef struct kmp_affinity_format_field_t {
8497 const char *long_name;
8500} kmp_affinity_format_field_t;
8502static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8503#if KMP_AFFINITY_SUPPORTED
8504 {
'A',
"thread_affinity",
's'},
8506 {
't',
"team_num",
'd'},
8507 {
'T',
"num_teams",
'd'},
8508 {
'L',
"nesting_level",
'd'},
8509 {
'n',
"thread_num",
'd'},
8510 {
'N',
"num_threads",
'd'},
8511 {
'a',
"ancestor_tnum",
'd'},
8513 {
'P',
"process_id",
'd'},
8514 {
'i',
"native_thread_id",
'd'}};
8517static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8519 kmp_str_buf_t *field_buffer) {
8520 int rc, format_index, field_value;
8521 const char *width_left, *width_right;
8522 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8523 static const int FORMAT_SIZE = 20;
8524 char format[FORMAT_SIZE] = {0};
8525 char absolute_short_name = 0;
8527 KMP_DEBUG_ASSERT(gtid >= 0);
8528 KMP_DEBUG_ASSERT(th);
8529 KMP_DEBUG_ASSERT(**ptr ==
'%');
8530 KMP_DEBUG_ASSERT(field_buffer);
8532 __kmp_str_buf_clear(field_buffer);
8539 __kmp_str_buf_cat(field_buffer,
"%", 1);
8550 right_justify =
false;
8552 right_justify =
true;
8556 width_left = width_right = NULL;
8557 if (**ptr >=
'0' && **ptr <=
'9') {
8565 format[format_index++] =
'%';
8567 format[format_index++] =
'-';
8569 format[format_index++] =
'0';
8570 if (width_left && width_right) {
8574 while (i < 8 && width_left < width_right) {
8575 format[format_index++] = *width_left;
8583 found_valid_name =
false;
8584 parse_long_name = (**ptr ==
'{');
8585 if (parse_long_name)
8587 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8588 sizeof(__kmp_affinity_format_table[0]);
8590 char short_name = __kmp_affinity_format_table[i].short_name;
8591 const char *long_name = __kmp_affinity_format_table[i].long_name;
8592 char field_format = __kmp_affinity_format_table[i].field_format;
8593 if (parse_long_name) {
8594 size_t length = KMP_STRLEN(long_name);
8595 if (strncmp(*ptr, long_name, length) == 0) {
8596 found_valid_name =
true;
8599 }
else if (**ptr == short_name) {
8600 found_valid_name =
true;
8603 if (found_valid_name) {
8604 format[format_index++] = field_format;
8605 format[format_index++] =
'\0';
8606 absolute_short_name = short_name;
8610 if (parse_long_name) {
8612 absolute_short_name = 0;
8620 switch (absolute_short_name) {
8622 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8625 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8628 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8631 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8634 static const int BUFFER_SIZE = 256;
8635 char buf[BUFFER_SIZE];
8636 __kmp_expand_host_name(buf, BUFFER_SIZE);
8637 rc = __kmp_str_buf_print(field_buffer, format, buf);
8640 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8643 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8646 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8650 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8651 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8653#if KMP_AFFINITY_SUPPORTED
8656 __kmp_str_buf_init(&buf);
8657 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8658 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8659 __kmp_str_buf_free(&buf);
8665 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8667 if (parse_long_name) {
8676 KMP_ASSERT(format_index <= FORMAT_SIZE);
8686size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8687 kmp_str_buf_t *buffer) {
8688 const char *parse_ptr;
8690 const kmp_info_t *th;
8691 kmp_str_buf_t field;
8693 KMP_DEBUG_ASSERT(buffer);
8694 KMP_DEBUG_ASSERT(gtid >= 0);
8696 __kmp_str_buf_init(&field);
8697 __kmp_str_buf_clear(buffer);
8699 th = __kmp_threads[gtid];
8705 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8706 parse_ptr = __kmp_affinity_format;
8708 KMP_DEBUG_ASSERT(parse_ptr);
8710 while (*parse_ptr !=
'\0') {
8712 if (*parse_ptr ==
'%') {
8714 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8715 __kmp_str_buf_catbuf(buffer, &field);
8719 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8724 __kmp_str_buf_free(&field);
8729void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8731 __kmp_str_buf_init(&buf);
8732 __kmp_aux_capture_affinity(gtid, format, &buf);
8733 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8734 __kmp_str_buf_free(&buf);
8738void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8739 int blocktime = arg;
8745 __kmp_save_internal_controls(thread);
8748 if (blocktime < KMP_MIN_BLOCKTIME)
8749 blocktime = KMP_MIN_BLOCKTIME;
8750 else if (blocktime > KMP_MAX_BLOCKTIME)
8751 blocktime = KMP_MAX_BLOCKTIME;
8753 set__blocktime_team(thread->th.th_team, tid, blocktime);
8754 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8758 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8760 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8761 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8767 set__bt_set_team(thread->th.th_team, tid, bt_set);
8768 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8770 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8771 "bt_intervals=%d, monitor_updates=%d\n",
8772 __kmp_gtid_from_tid(tid, thread->th.th_team),
8773 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8774 __kmp_monitor_wakeups));
8776 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8777 __kmp_gtid_from_tid(tid, thread->th.th_team),
8778 thread->th.th_team->t.t_id, tid, blocktime));
8782void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8783 if (!__kmp_init_serial) {
8784 __kmp_serial_initialize();
8786 __kmp_env_initialize(str);
8788 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8796PACKED_REDUCTION_METHOD_T
8797__kmp_determine_reduction_method(
8798 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8799 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8800 kmp_critical_name *lck) {
8811 PACKED_REDUCTION_METHOD_T retval;
8815 KMP_DEBUG_ASSERT(lck);
8817#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8819 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8820#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8822 retval = critical_reduce_block;
8825 team_size = __kmp_get_team_num_threads(global_tid);
8826 if (team_size == 1) {
8828 retval = empty_reduce_block;
8832 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8834#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8835 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8836 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8838#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8839 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \
8840 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8842 int teamsize_cutoff = 4;
8844#if KMP_MIC_SUPPORTED
8845 if (__kmp_mic_type != non_mic) {
8846 teamsize_cutoff = 8;
8849 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8850 if (tree_available) {
8851 if (team_size <= teamsize_cutoff) {
8852 if (atomic_available) {
8853 retval = atomic_reduce_block;
8856 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8858 }
else if (atomic_available) {
8859 retval = atomic_reduce_block;
8862#error "Unknown or unsupported OS"
8867#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8868 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
8870#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8871 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \
8872 KMP_OS_WASI || KMP_OS_AIX
8876 if (atomic_available) {
8877 if (num_vars <= 2) {
8878 retval = atomic_reduce_block;
8884 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8885 if (atomic_available && (num_vars <= 3)) {
8886 retval = atomic_reduce_block;
8887 }
else if (tree_available) {
8888 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8889 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8890 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8895#error "Unknown or unsupported OS"
8899#error "Unknown or unsupported architecture"
8907 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8910 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8912 int atomic_available, tree_available;
8914 switch ((forced_retval = __kmp_force_reduction_method)) {
8915 case critical_reduce_block:
8919 case atomic_reduce_block:
8920 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8921 if (!atomic_available) {
8922 KMP_WARNING(RedMethodNotSupported,
"atomic");
8923 forced_retval = critical_reduce_block;
8927 case tree_reduce_block:
8928 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8929 if (!tree_available) {
8930 KMP_WARNING(RedMethodNotSupported,
"tree");
8931 forced_retval = critical_reduce_block;
8933#if KMP_FAST_REDUCTION_BARRIER
8934 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8943 retval = forced_retval;
8946 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8948#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8949#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8954kmp_int32 __kmp_get_reduce_method(
void) {
8955 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8960void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8964void __kmp_hard_pause() {
8965 __kmp_pause_status = kmp_hard_paused;
8966 __kmp_internal_end_thread(-1);
8970void __kmp_resume_if_soft_paused() {
8971 if (__kmp_pause_status == kmp_soft_paused) {
8972 __kmp_pause_status = kmp_not_paused;
8974 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8975 kmp_info_t *thread = __kmp_threads[gtid];
8977 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8979 if (fl.is_sleeping())
8981 else if (__kmp_try_suspend_mx(thread)) {
8982 __kmp_unlock_suspend_mx(thread);
8985 if (fl.is_sleeping()) {
8988 }
else if (__kmp_try_suspend_mx(thread)) {
8989 __kmp_unlock_suspend_mx(thread);
9001int __kmp_pause_resource(kmp_pause_status_t level) {
9002 if (level == kmp_not_paused) {
9003 if (__kmp_pause_status == kmp_not_paused) {
9007 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9008 __kmp_pause_status == kmp_hard_paused);
9009 __kmp_pause_status = kmp_not_paused;
9012 }
else if (level == kmp_soft_paused) {
9013 if (__kmp_pause_status != kmp_not_paused) {
9020 }
else if (level == kmp_hard_paused) {
9021 if (__kmp_pause_status != kmp_not_paused) {
9034void __kmp_omp_display_env(
int verbose) {
9035 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9036 if (__kmp_init_serial == 0)
9037 __kmp_do_serial_initialize();
9038 __kmp_display_env_impl(!verbose, verbose);
9039 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9043void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9045 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9047 kmp_info_t **other_threads = team->t.t_threads;
9051 for (
int f = 1; f < old_nthreads; ++f) {
9052 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9054 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9060 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9061 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9065 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9067 team->t.t_threads[f]->th.th_used_in_team.store(2);
9068 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9071 team->t.b->go_release();
9077 int count = old_nthreads - 1;
9079 count = old_nthreads - 1;
9080 for (
int f = 1; f < old_nthreads; ++f) {
9081 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9082 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9083 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9084 void *, other_threads[f]->th.th_sleep_loc);
9085 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9088 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9094 team->t.b->update_num_threads(new_nthreads);
9095 team->t.b->go_reset();
9098void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9100 KMP_DEBUG_ASSERT(team);
9106 for (
int f = 1; f < new_nthreads; ++f) {
9107 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9108 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9110 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9111 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9112 (kmp_flag_32<false, false> *)NULL);
9118 int count = new_nthreads - 1;
9120 count = new_nthreads - 1;
9121 for (
int f = 1; f < new_nthreads; ++f) {
9122 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9130kmp_info_t **__kmp_hidden_helper_threads;
9131kmp_info_t *__kmp_hidden_helper_main_thread;
9132std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9134kmp_int32 __kmp_hidden_helper_threads_num = 8;
9135kmp_int32 __kmp_enable_hidden_helper = TRUE;
9137kmp_int32 __kmp_hidden_helper_threads_num = 0;
9138kmp_int32 __kmp_enable_hidden_helper = FALSE;
9142std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9144void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9149 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9150 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9151 __kmp_hidden_helper_threads_num)
9157 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9158 __kmp_hidden_helper_initz_release();
9159 __kmp_hidden_helper_main_thread_wait();
9161 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9162 __kmp_hidden_helper_worker_thread_signal();
9168void __kmp_hidden_helper_threads_initz_routine() {
9170 const int gtid = __kmp_register_root(TRUE);
9171 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9172 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9173 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9174 __kmp_hidden_helper_threads_num;
9176 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9181 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9183 __kmp_hidden_helper_threads_deinitz_release();
9203void __kmp_init_nesting_mode() {
9204 int levels = KMP_HW_LAST;
9205 __kmp_nesting_mode_nlevels = levels;
9206 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9207 for (
int i = 0; i < levels; ++i)
9208 __kmp_nesting_nth_level[i] = 0;
9209 if (__kmp_nested_nth.size < levels) {
9210 __kmp_nested_nth.nth =
9211 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9212 __kmp_nested_nth.size = levels;
9217void __kmp_set_nesting_mode_threads() {
9218 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9220 if (__kmp_nesting_mode == 1)
9221 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9222 else if (__kmp_nesting_mode > 1)
9223 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9225 if (__kmp_topology) {
9227 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9228 loc < __kmp_nesting_mode_nlevels;
9229 loc++, hw_level++) {
9230 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9231 if (__kmp_nesting_nth_level[loc] == 1)
9235 if (__kmp_nesting_mode > 1 && loc > 1) {
9236 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9237 int num_cores = __kmp_topology->get_count(core_level);
9238 int upper_levels = 1;
9239 for (
int level = 0; level < loc - 1; ++level)
9240 upper_levels *= __kmp_nesting_nth_level[level];
9241 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9242 __kmp_nesting_nth_level[loc - 1] =
9243 num_cores / __kmp_nesting_nth_level[loc - 2];
9245 __kmp_nesting_mode_nlevels = loc;
9246 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9248 if (__kmp_avail_proc >= 4) {
9249 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9250 __kmp_nesting_nth_level[1] = 2;
9251 __kmp_nesting_mode_nlevels = 2;
9253 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9254 __kmp_nesting_mode_nlevels = 1;
9256 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9258 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9259 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9261 set__nproc(thread, __kmp_nesting_nth_level[0]);
9262 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9263 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9264 if (get__max_active_levels(thread) > 1) {
9266 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9268 if (__kmp_nesting_mode == 1)
9269 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9274#if !KMP_STATS_ENABLED
9275void __kmp_reset_stats() {}
9278int __kmp_omp_debug_struct_info = FALSE;
9279int __kmp_debugging = FALSE;
9281#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9282void __kmp_itt_fini_ittlib() {}
9283void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)