22#include "ompt-specific.h"
24#define MAX_MESSAGE 512
40 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
41 __kmp_str_match_true(env)) {
42 __kmp_middle_initialize();
43 __kmp_assign_root_init_mask();
44 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
45 }
else if (__kmp_ignore_mppbeg() == FALSE) {
47 __kmp_internal_begin();
48 KC_TRACE(10, (
"__kmpc_begin: called\n"));
66 if (__kmp_ignore_mppend() == FALSE) {
67 KC_TRACE(10, (
"__kmpc_end: called\n"));
68 KA_TRACE(30, (
"__kmpc_end\n"));
70 __kmp_internal_end_thread(-1);
72#if KMP_OS_WINDOWS && OMPT_SUPPORT
77 if (ompt_enabled.enabled)
78 __kmp_internal_end_library(__kmp_gtid_get_specific());
101 kmp_int32 gtid = __kmp_entry_gtid();
103 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
124 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
126 return TCR_4(__kmp_all_nth);
136 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
137 return __kmp_tid_from_gtid(__kmp_entry_gtid());
146 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
148 return __kmp_entry_thread()->th.th_team->t.t_nproc;
168 if (__kmp_par_range == 0) {
175 semi2 = strchr(semi2,
';');
179 semi2 = strchr(semi2 + 1,
';');
183 if (__kmp_par_range_filename[0]) {
184 const char *name = semi2 - 1;
185 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
188 if ((*name ==
'/') || (*name ==
';')) {
191 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192 return __kmp_par_range < 0;
195 semi3 = strchr(semi2 + 1,
';');
196 if (__kmp_par_range_routine[0]) {
197 if ((semi3 != NULL) && (semi3 > semi2) &&
198 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199 return __kmp_par_range < 0;
202 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204 return __kmp_par_range > 0;
206 return __kmp_par_range < 0;
220 return __kmp_entry_thread()->th.th_root->r.r_active;
233 kmp_int32 num_threads) {
234 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235 global_tid, num_threads));
236 __kmp_assert_valid_gtid(global_tid);
237 __kmp_push_num_threads(loc, global_tid, num_threads);
240void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
241 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
245void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
246 kmp_int32 proc_bind) {
247 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
249 __kmp_assert_valid_gtid(global_tid);
250 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
264 int gtid = __kmp_entry_gtid();
266#if (KMP_STATS_ENABLED)
270 if (previous_state == stats_state_e::SERIAL_REGION) {
271 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
273 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
286 va_start(ap, microtask);
289 ompt_frame_t *ompt_frame;
290 if (ompt_enabled.enabled) {
291 kmp_info_t *master_th = __kmp_threads[gtid];
292 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
293 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
295 OMPT_STORE_RETURN_ADDRESS(gtid);
301 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
302 VOLATILE_CAST(microtask_t) microtask,
303 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
308 __kmp_join_call(loc, gtid
318 if (ompt_enabled.enabled) {
319 ompt_frame->enter_frame = ompt_data_none;
325 if (previous_state == stats_state_e::SERIAL_REGION) {
326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
327 KMP_SET_THREAD_STATE(previous_state);
329 KMP_POP_PARTITIONED_TIMER();
345 kmp_int32 cond,
void *args) {
346 int gtid = __kmp_entry_gtid();
356 void *exit_frame_ptr;
360 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
369 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
395 kmp_int32 num_teams, kmp_int32 num_threads) {
397 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
398 global_tid, num_teams, num_threads));
399 __kmp_assert_valid_gtid(global_tid);
400 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
414 kmp_int32 thread_limit) {
415 __kmp_assert_valid_gtid(global_tid);
416 kmp_info_t *thread = __kmp_threads[global_tid];
417 if (thread_limit > 0)
418 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
438 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
439 kmp_int32 num_threads) {
440 KA_TRACE(20, (
"__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
441 " num_teams_ub=%d num_threads=%d\n",
442 global_tid, num_teams_lb, num_teams_ub, num_threads));
443 __kmp_assert_valid_gtid(global_tid);
444 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
460 int gtid = __kmp_entry_gtid();
461 kmp_info_t *this_thr = __kmp_threads[gtid];
463 va_start(ap, microtask);
468 if (previous_state == stats_state_e::SERIAL_REGION) {
469 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
471 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
476 this_thr->th.th_teams_microtask = microtask;
477 this_thr->th.th_teams_level =
478 this_thr->th.th_team->t.t_level;
481 kmp_team_t *parent_team = this_thr->th.th_team;
482 int tid = __kmp_tid_from_gtid(gtid);
483 if (ompt_enabled.enabled) {
484 parent_team->t.t_implicit_task_taskdata[tid]
485 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
487 OMPT_STORE_RETURN_ADDRESS(gtid);
492 if (this_thr->th.th_teams_size.nteams == 0) {
493 __kmp_push_num_teams(loc, gtid, 0, 0);
495 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
496 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
497 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
500 loc, gtid, fork_context_intel, argc,
501 VOLATILE_CAST(microtask_t) __kmp_teams_master,
502 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
503 __kmp_join_call(loc, gtid
511 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
512 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
513 this_thr->th.th_cg_roots = tmp->up;
514 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
515 " to node %p. cg_nthreads was %d\n",
516 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
517 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
518 int i = tmp->cg_nthreads--;
523 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
524 this_thr->th.th_current_task->td_icvs.thread_limit =
525 this_thr->th.th_cg_roots->cg_thread_limit;
527 this_thr->th.th_teams_microtask = NULL;
528 this_thr->th.th_teams_level = 0;
529 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
532 if (previous_state == stats_state_e::SERIAL_REGION) {
533 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
534 KMP_SET_THREAD_STATE(previous_state);
536 KMP_POP_PARTITIONED_TIMER();
545int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
563 __kmp_assert_valid_gtid(global_tid);
565 OMPT_STORE_RETURN_ADDRESS(global_tid);
567 __kmp_serialized_parallel(loc, global_tid);
578 kmp_internal_control_t *top;
579 kmp_info_t *this_thr;
580 kmp_team_t *serial_team;
583 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
591 __kmp_assert_valid_gtid(global_tid);
592 if (!TCR_4(__kmp_init_parallel))
593 __kmp_parallel_initialize();
595 __kmp_resume_if_soft_paused();
597 this_thr = __kmp_threads[global_tid];
598 serial_team = this_thr->th.th_serial_team;
600 kmp_task_team_t *task_team = this_thr->th.th_task_team;
602 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
603 task_team->tt.tt_hidden_helper_task_encountered))
604 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
607 KMP_DEBUG_ASSERT(serial_team);
608 KMP_ASSERT(serial_team->t.t_serialized);
609 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
610 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
611 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
612 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
615 if (ompt_enabled.enabled &&
616 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
617 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
618 if (ompt_enabled.ompt_callback_implicit_task) {
619 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
620 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
621 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
625 ompt_data_t *parent_task_data;
626 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
628 if (ompt_enabled.ompt_callback_parallel_end) {
629 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
630 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
631 ompt_parallel_invoker_program | ompt_parallel_team,
632 OMPT_LOAD_RETURN_ADDRESS(global_tid));
634 __ompt_lw_taskteam_unlink(this_thr);
635 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
641 top = serial_team->t.t_control_stack_top;
642 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
643 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
644 serial_team->t.t_control_stack_top = top->next;
649 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
651 dispatch_private_info_t *disp_buffer =
652 serial_team->t.t_dispatch->th_disp_buffer;
653 serial_team->t.t_dispatch->th_disp_buffer =
654 serial_team->t.t_dispatch->th_disp_buffer->next;
655 __kmp_free(disp_buffer);
659 if (serial_team->t.t_serialized > 1) {
660 __kmp_pop_task_team_node(this_thr, serial_team);
663 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
665 --serial_team->t.t_serialized;
666 if (serial_team->t.t_serialized == 0) {
670#if KMP_ARCH_X86 || KMP_ARCH_X86_64
671 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
672 __kmp_clear_x87_fpu_status_word();
673 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
674 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
678 __kmp_pop_current_task_from_thread(this_thr);
680 if (ompd_state & OMPD_ENABLE_BP)
681 ompd_bp_parallel_end();
684 this_thr->th.th_team = serial_team->t.t_parent;
685 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
688 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
689 this_thr->th.th_team_master =
690 serial_team->t.t_parent->t.t_threads[0];
691 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
694 this_thr->th.th_dispatch =
695 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
697 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
698 this_thr->th.th_current_task->td_flags.executing = 1;
700 if (__kmp_tasking_mode != tskm_immediate_exec) {
702 KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
703 serial_team->t.t_primary_task_state == 1);
704 this_thr->th.th_task_state =
705 (kmp_uint8)serial_team->t.t_primary_task_state;
707 this_thr->th.th_task_team =
708 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
710 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
712 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
714#if KMP_AFFINITY_SUPPORTED
715 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
716 __kmp_reset_root_init_mask(global_tid);
720 if (__kmp_tasking_mode != tskm_immediate_exec) {
721 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
722 "depth of serial team %p to %d\n",
723 global_tid, serial_team, serial_team->t.t_serialized));
727 serial_team->t.t_level--;
728 if (__kmp_env_consistency_check)
729 __kmp_pop_parallel(global_tid, NULL);
731 if (ompt_enabled.enabled)
732 this_thr->th.ompt_thread_info.state =
733 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
734 : ompt_state_work_parallel);
747 KC_TRACE(10, (
"__kmpc_flush: called\n"));
752#if OMPT_SUPPORT && OMPT_OPTIONAL
753 if (ompt_enabled.ompt_callback_flush) {
754 ompt_callbacks.ompt_callback(ompt_callback_flush)(
755 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
770 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
771 __kmp_assert_valid_gtid(global_tid);
773 if (!TCR_4(__kmp_init_parallel))
774 __kmp_parallel_initialize();
776 __kmp_resume_if_soft_paused();
778 if (__kmp_env_consistency_check) {
780 KMP_WARNING(ConstructIdentInvalid);
782 __kmp_check_barrier(global_tid, ct_barrier, loc);
786 ompt_frame_t *ompt_frame;
787 if (ompt_enabled.enabled) {
788 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
789 if (ompt_frame->enter_frame.ptr == NULL)
790 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
792 OMPT_STORE_RETURN_ADDRESS(global_tid);
794 __kmp_threads[global_tid]->th.th_ident = loc;
802 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
803#if OMPT_SUPPORT && OMPT_OPTIONAL
804 if (ompt_enabled.enabled) {
805 ompt_frame->enter_frame = ompt_data_none;
820 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
821 __kmp_assert_valid_gtid(global_tid);
823 if (!TCR_4(__kmp_init_parallel))
824 __kmp_parallel_initialize();
826 __kmp_resume_if_soft_paused();
828 if (KMP_MASTER_GTID(global_tid)) {
830 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
834#if OMPT_SUPPORT && OMPT_OPTIONAL
836 if (ompt_enabled.ompt_callback_masked) {
837 kmp_info_t *this_thr = __kmp_threads[global_tid];
838 kmp_team_t *team = this_thr->th.th_team;
840 int tid = __kmp_tid_from_gtid(global_tid);
841 ompt_callbacks.ompt_callback(ompt_callback_masked)(
842 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
843 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
844 OMPT_GET_RETURN_ADDRESS(0));
849 if (__kmp_env_consistency_check) {
850#if KMP_USE_DYNAMIC_LOCK
852 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
854 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
857 __kmp_push_sync(global_tid, ct_master, loc, NULL);
859 __kmp_check_sync(global_tid, ct_master, loc, NULL);
875 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
876 __kmp_assert_valid_gtid(global_tid);
877 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
878 KMP_POP_PARTITIONED_TIMER();
880#if OMPT_SUPPORT && OMPT_OPTIONAL
881 kmp_info_t *this_thr = __kmp_threads[global_tid];
882 kmp_team_t *team = this_thr->th.th_team;
883 if (ompt_enabled.ompt_callback_masked) {
884 int tid = __kmp_tid_from_gtid(global_tid);
885 ompt_callbacks.ompt_callback(ompt_callback_masked)(
886 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
888 OMPT_GET_RETURN_ADDRESS(0));
892 if (__kmp_env_consistency_check) {
893 if (KMP_MASTER_GTID(global_tid))
894 __kmp_pop_sync(global_tid, ct_master, loc);
909 KC_TRACE(10, (
"__kmpc_masked: called T#%d\n", global_tid));
910 __kmp_assert_valid_gtid(global_tid);
912 if (!TCR_4(__kmp_init_parallel))
913 __kmp_parallel_initialize();
915 __kmp_resume_if_soft_paused();
917 tid = __kmp_tid_from_gtid(global_tid);
920 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
924#if OMPT_SUPPORT && OMPT_OPTIONAL
926 if (ompt_enabled.ompt_callback_masked) {
927 kmp_info_t *this_thr = __kmp_threads[global_tid];
928 kmp_team_t *team = this_thr->th.th_team;
929 ompt_callbacks.ompt_callback(ompt_callback_masked)(
930 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
931 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
932 OMPT_GET_RETURN_ADDRESS(0));
937 if (__kmp_env_consistency_check) {
938#if KMP_USE_DYNAMIC_LOCK
940 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
942 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
945 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
947 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
963 KC_TRACE(10, (
"__kmpc_end_masked: called T#%d\n", global_tid));
964 __kmp_assert_valid_gtid(global_tid);
965 KMP_POP_PARTITIONED_TIMER();
967#if OMPT_SUPPORT && OMPT_OPTIONAL
968 kmp_info_t *this_thr = __kmp_threads[global_tid];
969 kmp_team_t *team = this_thr->th.th_team;
970 if (ompt_enabled.ompt_callback_masked) {
971 int tid = __kmp_tid_from_gtid(global_tid);
972 ompt_callbacks.ompt_callback(ompt_callback_masked)(
973 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
974 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
975 OMPT_GET_RETURN_ADDRESS(0));
979 if (__kmp_env_consistency_check) {
980 __kmp_pop_sync(global_tid, ct_masked, loc);
994 KMP_DEBUG_ASSERT(__kmp_init_serial);
996 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
997 __kmp_assert_valid_gtid(gtid);
999 if (!TCR_4(__kmp_init_parallel))
1000 __kmp_parallel_initialize();
1002 __kmp_resume_if_soft_paused();
1005 __kmp_itt_ordered_prep(gtid);
1009 th = __kmp_threads[gtid];
1011#if OMPT_SUPPORT && OMPT_OPTIONAL
1015 OMPT_STORE_RETURN_ADDRESS(gtid);
1016 if (ompt_enabled.enabled) {
1017 team = __kmp_team_from_gtid(gtid);
1018 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1020 th->th.ompt_thread_info.wait_id = lck;
1021 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1024 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1025 if (ompt_enabled.ompt_callback_mutex_acquire) {
1026 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1027 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1033 if (th->th.th_dispatch->th_deo_fcn != 0)
1034 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
1036 __kmp_parallel_deo(>id, &cid, loc);
1038#if OMPT_SUPPORT && OMPT_OPTIONAL
1039 if (ompt_enabled.enabled) {
1041 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1042 th->th.ompt_thread_info.wait_id = 0;
1045 if (ompt_enabled.ompt_callback_mutex_acquired) {
1046 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1047 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1053 __kmp_itt_ordered_start(gtid);
1068 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
1069 __kmp_assert_valid_gtid(gtid);
1072 __kmp_itt_ordered_end(gtid);
1076 th = __kmp_threads[gtid];
1078 if (th->th.th_dispatch->th_dxo_fcn != 0)
1079 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1081 __kmp_parallel_dxo(>id, &cid, loc);
1083#if OMPT_SUPPORT && OMPT_OPTIONAL
1084 OMPT_STORE_RETURN_ADDRESS(gtid);
1085 if (ompt_enabled.ompt_callback_mutex_released) {
1086 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1088 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1089 ->t.t_ordered.dt.t_value,
1090 OMPT_LOAD_RETURN_ADDRESS(gtid));
1095#if KMP_USE_DYNAMIC_LOCK
1097static __forceinline
void
1098__kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
1099 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1103 kmp_indirect_lock_t **lck;
1104 lck = (kmp_indirect_lock_t **)crit;
1105 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1106 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1107 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1108 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1110 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1112 __kmp_itt_critical_creating(ilk->lock, loc);
1114 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
1117 __kmp_itt_critical_destroyed(ilk->lock);
1123 KMP_DEBUG_ASSERT(*lck != NULL);
1127#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1129 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1130 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1131 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1132 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1133 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1135 KMP_FSYNC_PREPARE(l); \
1136 KMP_INIT_YIELD(spins); \
1137 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1139 if (TCR_4(__kmp_nth) > \
1140 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1143 KMP_YIELD_SPIN(spins); \
1145 __kmp_spin_backoff(&backoff); \
1147 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1148 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1150 KMP_FSYNC_ACQUIRED(l); \
1154#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1156 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1157 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1158 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1159 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1160 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1164#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1165 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1169#include <sys/syscall.h>
1179#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1181 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1182 kmp_int32 gtid_code = (gtid + 1) << 1; \
1184 KMP_FSYNC_PREPARE(ftx); \
1185 kmp_int32 poll_val; \
1186 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1187 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1188 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1189 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1191 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1193 KMP_LOCK_BUSY(1, futex))) { \
1196 poll_val |= KMP_LOCK_BUSY(1, futex); \
1199 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1200 NULL, NULL, 0)) != 0) { \
1205 KMP_FSYNC_ACQUIRED(ftx); \
1209#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1211 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1212 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1213 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1214 KMP_FSYNC_ACQUIRED(ftx); \
1222#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1224 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1226 KMP_FSYNC_RELEASING(ftx); \
1227 kmp_int32 poll_val = \
1228 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1229 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1230 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1231 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1234 KMP_YIELD_OVERSUB(); \
1241static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1244 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1247 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1254 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1255 __kmp_init_user_lock_with_checks(lck);
1256 __kmp_set_user_lock_location(lck, loc);
1258 __kmp_itt_critical_creating(lck);
1269 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1274 __kmp_itt_critical_destroyed(lck);
1278 __kmp_destroy_user_lock_with_checks(lck);
1279 __kmp_user_lock_free(&idx, gtid, lck);
1280 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1281 KMP_DEBUG_ASSERT(lck != NULL);
1300 kmp_critical_name *crit) {
1301#if KMP_USE_DYNAMIC_LOCK
1302#if OMPT_SUPPORT && OMPT_OPTIONAL
1303 OMPT_STORE_RETURN_ADDRESS(global_tid);
1305 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1308#if OMPT_SUPPORT && OMPT_OPTIONAL
1309 ompt_state_t prev_state = ompt_state_undefined;
1310 ompt_thread_info_t ti;
1312 kmp_user_lock_p lck;
1314 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1315 __kmp_assert_valid_gtid(global_tid);
1319 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1320 KMP_CHECK_USER_LOCK_INIT();
1322 if ((__kmp_user_lock_kind == lk_tas) &&
1323 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1324 lck = (kmp_user_lock_p)crit;
1327 else if ((__kmp_user_lock_kind == lk_futex) &&
1328 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1329 lck = (kmp_user_lock_p)crit;
1333 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1336 if (__kmp_env_consistency_check)
1337 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1345 __kmp_itt_critical_acquiring(lck);
1347#if OMPT_SUPPORT && OMPT_OPTIONAL
1348 OMPT_STORE_RETURN_ADDRESS(gtid);
1349 void *codeptr_ra = NULL;
1350 if (ompt_enabled.enabled) {
1351 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1353 prev_state = ti.state;
1354 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1355 ti.state = ompt_state_wait_critical;
1358 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1359 if (ompt_enabled.ompt_callback_mutex_acquire) {
1360 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1361 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1362 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1368 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1371 __kmp_itt_critical_acquired(lck);
1373#if OMPT_SUPPORT && OMPT_OPTIONAL
1374 if (ompt_enabled.enabled) {
1376 ti.state = prev_state;
1380 if (ompt_enabled.ompt_callback_mutex_acquired) {
1381 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1382 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1386 KMP_POP_PARTITIONED_TIMER();
1388 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1389 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1393#if KMP_USE_DYNAMIC_LOCK
1396static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1398#define KMP_TSX_LOCK(seq) lockseq_##seq
1400#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1403#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1404#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1406#define KMP_CPUINFO_RTM 0
1410 if (hint & kmp_lock_hint_hle)
1411 return KMP_TSX_LOCK(hle);
1412 if (hint & kmp_lock_hint_rtm)
1413 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1414 if (hint & kmp_lock_hint_adaptive)
1415 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1418 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1419 return __kmp_user_lock_seq;
1420 if ((hint & omp_lock_hint_speculative) &&
1421 (hint & omp_lock_hint_nonspeculative))
1422 return __kmp_user_lock_seq;
1425 if (hint & omp_lock_hint_contended)
1426 return lockseq_queuing;
1429 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1433 if (hint & omp_lock_hint_speculative)
1434 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1436 return __kmp_user_lock_seq;
1439#if OMPT_SUPPORT && OMPT_OPTIONAL
1440#if KMP_USE_DYNAMIC_LOCK
1441static kmp_mutex_impl_t
1442__ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1444 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1449 return kmp_mutex_impl_queuing;
1452 return kmp_mutex_impl_spin;
1455 case locktag_rtm_spin:
1456 return kmp_mutex_impl_speculative;
1459 return kmp_mutex_impl_none;
1461 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1464 switch (ilock->type) {
1466 case locktag_adaptive:
1467 case locktag_rtm_queuing:
1468 return kmp_mutex_impl_speculative;
1470 case locktag_nested_tas:
1471 return kmp_mutex_impl_spin;
1473 case locktag_nested_futex:
1475 case locktag_ticket:
1476 case locktag_queuing:
1478 case locktag_nested_ticket:
1479 case locktag_nested_queuing:
1480 case locktag_nested_drdpa:
1481 return kmp_mutex_impl_queuing;
1483 return kmp_mutex_impl_none;
1488static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1489 switch (__kmp_user_lock_kind) {
1491 return kmp_mutex_impl_spin;
1498 return kmp_mutex_impl_queuing;
1501 case lk_rtm_queuing:
1504 return kmp_mutex_impl_speculative;
1507 return kmp_mutex_impl_none;
1526void __kmpc_critical_with_hint(
ident_t *loc, kmp_int32 global_tid,
1527 kmp_critical_name *crit, uint32_t hint) {
1529 kmp_user_lock_p lck;
1530#if OMPT_SUPPORT && OMPT_OPTIONAL
1531 ompt_state_t prev_state = ompt_state_undefined;
1532 ompt_thread_info_t ti;
1534 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1536 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1539 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1540 __kmp_assert_valid_gtid(global_tid);
1542 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1544 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1545 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1547 if (KMP_IS_D_LOCK(lockseq)) {
1548 KMP_COMPARE_AND_STORE_ACQ32(
1549 (
volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1550 KMP_GET_D_TAG(lockseq));
1552 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1558 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1559 lck = (kmp_user_lock_p)lk;
1560 if (__kmp_env_consistency_check) {
1561 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1562 __kmp_map_hint_to_lock(hint));
1565 __kmp_itt_critical_acquiring(lck);
1567#if OMPT_SUPPORT && OMPT_OPTIONAL
1568 if (ompt_enabled.enabled) {
1569 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1571 prev_state = ti.state;
1572 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1573 ti.state = ompt_state_wait_critical;
1576 if (ompt_enabled.ompt_callback_mutex_acquire) {
1577 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1578 ompt_mutex_critical, (
unsigned int)hint,
1579 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1584#if KMP_USE_INLINED_TAS
1585 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1586 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1588#elif KMP_USE_INLINED_FUTEX
1589 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1590 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1594 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1597 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1599 if (__kmp_env_consistency_check) {
1600 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1601 __kmp_map_hint_to_lock(hint));
1604 __kmp_itt_critical_acquiring(lck);
1606#if OMPT_SUPPORT && OMPT_OPTIONAL
1607 if (ompt_enabled.enabled) {
1608 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1610 prev_state = ti.state;
1611 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1612 ti.state = ompt_state_wait_critical;
1615 if (ompt_enabled.ompt_callback_mutex_acquire) {
1616 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1617 ompt_mutex_critical, (
unsigned int)hint,
1618 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1623 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1625 KMP_POP_PARTITIONED_TIMER();
1628 __kmp_itt_critical_acquired(lck);
1630#if OMPT_SUPPORT && OMPT_OPTIONAL
1631 if (ompt_enabled.enabled) {
1633 ti.state = prev_state;
1637 if (ompt_enabled.ompt_callback_mutex_acquired) {
1638 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1639 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1644 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1645 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1660 kmp_critical_name *crit) {
1661 kmp_user_lock_p lck;
1663 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1665#if KMP_USE_DYNAMIC_LOCK
1666 int locktag = KMP_EXTRACT_D_TAG(crit);
1668 lck = (kmp_user_lock_p)crit;
1669 KMP_ASSERT(lck != NULL);
1670 if (__kmp_env_consistency_check) {
1671 __kmp_pop_sync(global_tid, ct_critical, loc);
1674 __kmp_itt_critical_releasing(lck);
1676#if KMP_USE_INLINED_TAS
1677 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1678 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1680#elif KMP_USE_INLINED_FUTEX
1681 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1682 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1686 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1689 kmp_indirect_lock_t *ilk =
1690 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1691 KMP_ASSERT(ilk != NULL);
1693 if (__kmp_env_consistency_check) {
1694 __kmp_pop_sync(global_tid, ct_critical, loc);
1697 __kmp_itt_critical_releasing(lck);
1699 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1704 if ((__kmp_user_lock_kind == lk_tas) &&
1705 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1706 lck = (kmp_user_lock_p)crit;
1709 else if ((__kmp_user_lock_kind == lk_futex) &&
1710 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1711 lck = (kmp_user_lock_p)crit;
1715 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1718 KMP_ASSERT(lck != NULL);
1720 if (__kmp_env_consistency_check)
1721 __kmp_pop_sync(global_tid, ct_critical, loc);
1724 __kmp_itt_critical_releasing(lck);
1728 __kmp_release_user_lock_with_checks(lck, global_tid);
1732#if OMPT_SUPPORT && OMPT_OPTIONAL
1735 OMPT_STORE_RETURN_ADDRESS(global_tid);
1736 if (ompt_enabled.ompt_callback_mutex_released) {
1737 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1738 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1739 OMPT_LOAD_RETURN_ADDRESS(0));
1743 KMP_POP_PARTITIONED_TIMER();
1744 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1758 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1759 __kmp_assert_valid_gtid(global_tid);
1761 if (!TCR_4(__kmp_init_parallel))
1762 __kmp_parallel_initialize();
1764 __kmp_resume_if_soft_paused();
1766 if (__kmp_env_consistency_check)
1767 __kmp_check_barrier(global_tid, ct_barrier, loc);
1770 ompt_frame_t *ompt_frame;
1771 if (ompt_enabled.enabled) {
1772 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1773 if (ompt_frame->enter_frame.ptr == NULL)
1774 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1776 OMPT_STORE_RETURN_ADDRESS(global_tid);
1779 __kmp_threads[global_tid]->th.th_ident = loc;
1781 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1782#if OMPT_SUPPORT && OMPT_OPTIONAL
1783 if (ompt_enabled.enabled) {
1784 ompt_frame->enter_frame = ompt_data_none;
1788 return (status != 0) ? 0 : 1;
1801 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1802 __kmp_assert_valid_gtid(global_tid);
1803 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1818 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1819 __kmp_assert_valid_gtid(global_tid);
1821 if (!TCR_4(__kmp_init_parallel))
1822 __kmp_parallel_initialize();
1824 __kmp_resume_if_soft_paused();
1826 if (__kmp_env_consistency_check) {
1828 KMP_WARNING(ConstructIdentInvalid);
1830 __kmp_check_barrier(global_tid, ct_barrier, loc);
1834 ompt_frame_t *ompt_frame;
1835 if (ompt_enabled.enabled) {
1836 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1837 if (ompt_frame->enter_frame.ptr == NULL)
1838 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1840 OMPT_STORE_RETURN_ADDRESS(global_tid);
1843 __kmp_threads[global_tid]->th.th_ident = loc;
1845 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1846#if OMPT_SUPPORT && OMPT_OPTIONAL
1847 if (ompt_enabled.enabled) {
1848 ompt_frame->enter_frame = ompt_data_none;
1854 if (__kmp_env_consistency_check) {
1860 __kmp_pop_sync(global_tid, ct_master, loc);
1880 __kmp_assert_valid_gtid(global_tid);
1881 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1886 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1889#if OMPT_SUPPORT && OMPT_OPTIONAL
1890 kmp_info_t *this_thr = __kmp_threads[global_tid];
1891 kmp_team_t *team = this_thr->th.th_team;
1892 int tid = __kmp_tid_from_gtid(global_tid);
1894 if (ompt_enabled.enabled) {
1896 if (ompt_enabled.ompt_callback_work) {
1897 ompt_callbacks.ompt_callback(ompt_callback_work)(
1898 ompt_work_single_executor, ompt_scope_begin,
1899 &(team->t.ompt_team_info.parallel_data),
1900 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1901 1, OMPT_GET_RETURN_ADDRESS(0));
1904 if (ompt_enabled.ompt_callback_work) {
1905 ompt_callbacks.ompt_callback(ompt_callback_work)(
1906 ompt_work_single_other, ompt_scope_begin,
1907 &(team->t.ompt_team_info.parallel_data),
1908 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1909 1, OMPT_GET_RETURN_ADDRESS(0));
1910 ompt_callbacks.ompt_callback(ompt_callback_work)(
1911 ompt_work_single_other, ompt_scope_end,
1912 &(team->t.ompt_team_info.parallel_data),
1913 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1914 1, OMPT_GET_RETURN_ADDRESS(0));
1933 __kmp_assert_valid_gtid(global_tid);
1934 __kmp_exit_single(global_tid);
1935 KMP_POP_PARTITIONED_TIMER();
1937#if OMPT_SUPPORT && OMPT_OPTIONAL
1938 kmp_info_t *this_thr = __kmp_threads[global_tid];
1939 kmp_team_t *team = this_thr->th.th_team;
1940 int tid = __kmp_tid_from_gtid(global_tid);
1942 if (ompt_enabled.ompt_callback_work) {
1943 ompt_callbacks.ompt_callback(ompt_callback_work)(
1944 ompt_work_single_executor, ompt_scope_end,
1945 &(team->t.ompt_team_info.parallel_data),
1946 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1947 OMPT_GET_RETURN_ADDRESS(0));
1960 KMP_POP_PARTITIONED_TIMER();
1961 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1963#if OMPT_SUPPORT && OMPT_OPTIONAL
1964 if (ompt_enabled.ompt_callback_work) {
1965 ompt_work_t ompt_work_type = ompt_work_loop;
1966 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1967 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1971 ompt_work_type = ompt_work_loop;
1973 ompt_work_type = ompt_work_sections;
1975 ompt_work_type = ompt_work_distribute;
1980 KMP_DEBUG_ASSERT(ompt_work_type);
1982 ompt_callbacks.ompt_callback(ompt_callback_work)(
1983 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1984 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1987 if (__kmp_env_consistency_check)
1988 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1994void ompc_set_num_threads(
int arg) {
1996 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1999void ompc_set_dynamic(
int flag) {
2003 thread = __kmp_entry_thread();
2005 __kmp_save_internal_controls(thread);
2007 set__dynamic(thread, flag ?
true : false);
2010void ompc_set_nested(
int flag) {
2014 thread = __kmp_entry_thread();
2016 __kmp_save_internal_controls(thread);
2018 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2021void ompc_set_max_active_levels(
int max_active_levels) {
2026 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2029void ompc_set_schedule(omp_sched_t kind,
int modifier) {
2031 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2034int ompc_get_ancestor_thread_num(
int level) {
2035 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2038int ompc_get_team_size(
int level) {
2039 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2043void KMP_EXPAND_NAME(ompc_set_affinity_format)(
char const *format) {
2044 if (!__kmp_init_serial) {
2045 __kmp_serial_initialize();
2047 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2048 format, KMP_STRLEN(format) + 1);
2051size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(
char *buffer,
size_t size) {
2053 if (!__kmp_init_serial) {
2054 __kmp_serial_initialize();
2056 format_size = KMP_STRLEN(__kmp_affinity_format);
2057 if (buffer && size) {
2058 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2064void KMP_EXPAND_NAME(ompc_display_affinity)(
char const *format) {
2066 if (!TCR_4(__kmp_init_middle)) {
2067 __kmp_middle_initialize();
2069 __kmp_assign_root_init_mask();
2070 gtid = __kmp_get_gtid();
2071#if KMP_AFFINITY_SUPPORTED
2072 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2073 __kmp_affinity.flags.reset) {
2074 __kmp_reset_root_init_mask(gtid);
2077 __kmp_aux_display_affinity(gtid, format);
2080size_t KMP_EXPAND_NAME(ompc_capture_affinity)(
char *buffer,
size_t buf_size,
2081 char const *format) {
2083 size_t num_required;
2084 kmp_str_buf_t capture_buf;
2085 if (!TCR_4(__kmp_init_middle)) {
2086 __kmp_middle_initialize();
2088 __kmp_assign_root_init_mask();
2089 gtid = __kmp_get_gtid();
2090#if KMP_AFFINITY_SUPPORTED
2091 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2092 __kmp_affinity.flags.reset) {
2093 __kmp_reset_root_init_mask(gtid);
2096 __kmp_str_buf_init(&capture_buf);
2097 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2098 if (buffer && buf_size) {
2099 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2100 capture_buf.used + 1);
2102 __kmp_str_buf_free(&capture_buf);
2103 return num_required;
2106void kmpc_set_stacksize(
int arg) {
2108 __kmp_aux_set_stacksize(arg);
2111void kmpc_set_stacksize_s(
size_t arg) {
2113 __kmp_aux_set_stacksize(arg);
2116void kmpc_set_blocktime(
int arg) {
2117 int gtid, tid, bt = arg;
2120 gtid = __kmp_entry_gtid();
2121 tid = __kmp_tid_from_gtid(gtid);
2122 thread = __kmp_thread_from_gtid(gtid);
2124 __kmp_aux_convert_blocktime(&bt);
2125 __kmp_aux_set_blocktime(bt, thread, tid);
2128void kmpc_set_library(
int arg) {
2130 __kmp_user_set_library((
enum library_type)arg);
2133void kmpc_set_defaults(
char const *str) {
2135 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2138void kmpc_set_disp_num_buffers(
int arg) {
2141 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2142 arg <= KMP_MAX_DISP_NUM_BUFF) {
2143 __kmp_dispatch_num_buffers = arg;
2147int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2148#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2151 if (!TCR_4(__kmp_init_middle)) {
2152 __kmp_middle_initialize();
2154 __kmp_assign_root_init_mask();
2155 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2159int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2160#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2163 if (!TCR_4(__kmp_init_middle)) {
2164 __kmp_middle_initialize();
2166 __kmp_assign_root_init_mask();
2167 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2171int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2172#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2175 if (!TCR_4(__kmp_init_middle)) {
2176 __kmp_middle_initialize();
2178 __kmp_assign_root_init_mask();
2179 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2229 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2232 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2233 __kmp_assert_valid_gtid(gtid);
2237 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2239 if (__kmp_env_consistency_check) {
2241 KMP_WARNING(ConstructIdentInvalid);
2248 *data_ptr = cpy_data;
2251 ompt_frame_t *ompt_frame;
2252 if (ompt_enabled.enabled) {
2253 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2254 if (ompt_frame->enter_frame.ptr == NULL)
2255 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2257 OMPT_STORE_RETURN_ADDRESS(gtid);
2261 __kmp_threads[gtid]->th.th_ident = loc;
2263 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2266 (*cpy_func)(cpy_data, *data_ptr);
2272 OMPT_STORE_RETURN_ADDRESS(gtid);
2275 __kmp_threads[gtid]->th.th_ident = loc;
2278 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2279#if OMPT_SUPPORT && OMPT_OPTIONAL
2280 if (ompt_enabled.enabled) {
2281 ompt_frame->enter_frame = ompt_data_none;
2307 KC_TRACE(10, (
"__kmpc_copyprivate_light: called T#%d\n", gtid));
2311 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2313 if (__kmp_env_consistency_check) {
2315 KMP_WARNING(ConstructIdentInvalid);
2322 *data_ptr = cpy_data;
2325 ompt_frame_t *ompt_frame;
2326 if (ompt_enabled.enabled) {
2327 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2328 if (ompt_frame->enter_frame.ptr == NULL)
2329 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2330 OMPT_STORE_RETURN_ADDRESS(gtid);
2335 __kmp_threads[gtid]->th.th_ident = loc;
2337 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2344#define INIT_LOCK __kmp_init_user_lock_with_checks
2345#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2346#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2347#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2348#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2349#define ACQUIRE_NESTED_LOCK_TIMED \
2350 __kmp_acquire_nested_user_lock_with_checks_timed
2351#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2352#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2353#define TEST_LOCK __kmp_test_user_lock_with_checks
2354#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2355#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2356#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2361#if KMP_USE_DYNAMIC_LOCK
2364static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2365 kmp_dyna_lockseq_t seq) {
2366 if (KMP_IS_D_LOCK(seq)) {
2367 KMP_INIT_D_LOCK(lock, seq);
2369 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2372 KMP_INIT_I_LOCK(lock, seq);
2374 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2375 __kmp_itt_lock_creating(ilk->lock, loc);
2381static __forceinline
void
2382__kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2383 kmp_dyna_lockseq_t seq) {
2386 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2387 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2388 seq = __kmp_user_lock_seq;
2392 seq = lockseq_nested_tas;
2396 seq = lockseq_nested_futex;
2399 case lockseq_ticket:
2400 seq = lockseq_nested_ticket;
2402 case lockseq_queuing:
2403 seq = lockseq_nested_queuing;
2406 seq = lockseq_nested_drdpa;
2409 seq = lockseq_nested_queuing;
2411 KMP_INIT_I_LOCK(lock, seq);
2413 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2414 __kmp_itt_lock_creating(ilk->lock, loc);
2419void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2421 KMP_DEBUG_ASSERT(__kmp_init_serial);
2422 if (__kmp_env_consistency_check && user_lock == NULL) {
2423 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2426 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2428#if OMPT_SUPPORT && OMPT_OPTIONAL
2430 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2432 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2433 if (ompt_enabled.ompt_callback_lock_init) {
2434 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2435 ompt_mutex_lock, (omp_lock_hint_t)hint,
2436 __ompt_get_mutex_impl_type(user_lock),
2437 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2443void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2444 void **user_lock, uintptr_t hint) {
2445 KMP_DEBUG_ASSERT(__kmp_init_serial);
2446 if (__kmp_env_consistency_check && user_lock == NULL) {
2447 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2450 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2452#if OMPT_SUPPORT && OMPT_OPTIONAL
2454 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2456 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2457 if (ompt_enabled.ompt_callback_lock_init) {
2458 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2459 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2460 __ompt_get_mutex_impl_type(user_lock),
2461 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2469void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2470#if KMP_USE_DYNAMIC_LOCK
2472 KMP_DEBUG_ASSERT(__kmp_init_serial);
2473 if (__kmp_env_consistency_check && user_lock == NULL) {
2474 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2476 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2478#if OMPT_SUPPORT && OMPT_OPTIONAL
2480 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2482 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2483 if (ompt_enabled.ompt_callback_lock_init) {
2484 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2485 ompt_mutex_lock, omp_lock_hint_none,
2486 __ompt_get_mutex_impl_type(user_lock),
2487 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2493 static char const *
const func =
"omp_init_lock";
2494 kmp_user_lock_p lck;
2495 KMP_DEBUG_ASSERT(__kmp_init_serial);
2497 if (__kmp_env_consistency_check) {
2498 if (user_lock == NULL) {
2499 KMP_FATAL(LockIsUninitialized, func);
2503 KMP_CHECK_USER_LOCK_INIT();
2505 if ((__kmp_user_lock_kind == lk_tas) &&
2506 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2507 lck = (kmp_user_lock_p)user_lock;
2510 else if ((__kmp_user_lock_kind == lk_futex) &&
2511 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2512 lck = (kmp_user_lock_p)user_lock;
2516 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2519 __kmp_set_user_lock_location(lck, loc);
2521#if OMPT_SUPPORT && OMPT_OPTIONAL
2523 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2525 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2526 if (ompt_enabled.ompt_callback_lock_init) {
2527 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2528 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2529 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2534 __kmp_itt_lock_creating(lck);
2541void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2542#if KMP_USE_DYNAMIC_LOCK
2544 KMP_DEBUG_ASSERT(__kmp_init_serial);
2545 if (__kmp_env_consistency_check && user_lock == NULL) {
2546 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2548 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2550#if OMPT_SUPPORT && OMPT_OPTIONAL
2552 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2554 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2555 if (ompt_enabled.ompt_callback_lock_init) {
2556 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2557 ompt_mutex_nest_lock, omp_lock_hint_none,
2558 __ompt_get_mutex_impl_type(user_lock),
2559 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2565 static char const *
const func =
"omp_init_nest_lock";
2566 kmp_user_lock_p lck;
2567 KMP_DEBUG_ASSERT(__kmp_init_serial);
2569 if (__kmp_env_consistency_check) {
2570 if (user_lock == NULL) {
2571 KMP_FATAL(LockIsUninitialized, func);
2575 KMP_CHECK_USER_LOCK_INIT();
2577 if ((__kmp_user_lock_kind == lk_tas) &&
2578 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2579 OMP_NEST_LOCK_T_SIZE)) {
2580 lck = (kmp_user_lock_p)user_lock;
2583 else if ((__kmp_user_lock_kind == lk_futex) &&
2584 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2585 OMP_NEST_LOCK_T_SIZE)) {
2586 lck = (kmp_user_lock_p)user_lock;
2590 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2593 INIT_NESTED_LOCK(lck);
2594 __kmp_set_user_lock_location(lck, loc);
2596#if OMPT_SUPPORT && OMPT_OPTIONAL
2598 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2600 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2601 if (ompt_enabled.ompt_callback_lock_init) {
2602 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2603 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2604 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2609 __kmp_itt_lock_creating(lck);
2615void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2616#if KMP_USE_DYNAMIC_LOCK
2619 kmp_user_lock_p lck;
2620 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2621 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2623 lck = (kmp_user_lock_p)user_lock;
2625 __kmp_itt_lock_destroyed(lck);
2627#if OMPT_SUPPORT && OMPT_OPTIONAL
2629 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2631 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2632 if (ompt_enabled.ompt_callback_lock_destroy) {
2633 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2634 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2637 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2639 kmp_user_lock_p lck;
2641 if ((__kmp_user_lock_kind == lk_tas) &&
2642 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2643 lck = (kmp_user_lock_p)user_lock;
2646 else if ((__kmp_user_lock_kind == lk_futex) &&
2647 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2648 lck = (kmp_user_lock_p)user_lock;
2652 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2655#if OMPT_SUPPORT && OMPT_OPTIONAL
2657 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2659 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2660 if (ompt_enabled.ompt_callback_lock_destroy) {
2661 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2662 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2667 __kmp_itt_lock_destroyed(lck);
2671 if ((__kmp_user_lock_kind == lk_tas) &&
2672 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2676 else if ((__kmp_user_lock_kind == lk_futex) &&
2677 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2682 __kmp_user_lock_free(user_lock, gtid, lck);
2688void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2689#if KMP_USE_DYNAMIC_LOCK
2692 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2693 __kmp_itt_lock_destroyed(ilk->lock);
2695#if OMPT_SUPPORT && OMPT_OPTIONAL
2697 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2699 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2700 if (ompt_enabled.ompt_callback_lock_destroy) {
2701 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2702 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2705 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2709 kmp_user_lock_p lck;
2711 if ((__kmp_user_lock_kind == lk_tas) &&
2712 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2713 OMP_NEST_LOCK_T_SIZE)) {
2714 lck = (kmp_user_lock_p)user_lock;
2717 else if ((__kmp_user_lock_kind == lk_futex) &&
2718 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2719 OMP_NEST_LOCK_T_SIZE)) {
2720 lck = (kmp_user_lock_p)user_lock;
2724 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2727#if OMPT_SUPPORT && OMPT_OPTIONAL
2729 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2731 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2732 if (ompt_enabled.ompt_callback_lock_destroy) {
2733 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2734 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2739 __kmp_itt_lock_destroyed(lck);
2742 DESTROY_NESTED_LOCK(lck);
2744 if ((__kmp_user_lock_kind == lk_tas) &&
2745 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2746 OMP_NEST_LOCK_T_SIZE)) {
2750 else if ((__kmp_user_lock_kind == lk_futex) &&
2751 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2752 OMP_NEST_LOCK_T_SIZE)) {
2757 __kmp_user_lock_free(user_lock, gtid, lck);
2762void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2764#if KMP_USE_DYNAMIC_LOCK
2765 int tag = KMP_EXTRACT_D_TAG(user_lock);
2767 __kmp_itt_lock_acquiring(
2771#if OMPT_SUPPORT && OMPT_OPTIONAL
2773 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2775 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2776 if (ompt_enabled.ompt_callback_mutex_acquire) {
2777 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2778 ompt_mutex_lock, omp_lock_hint_none,
2779 __ompt_get_mutex_impl_type(user_lock),
2780 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2783#if KMP_USE_INLINED_TAS
2784 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2785 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2787#elif KMP_USE_INLINED_FUTEX
2788 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2789 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2793 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2796 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2798#if OMPT_SUPPORT && OMPT_OPTIONAL
2799 if (ompt_enabled.ompt_callback_mutex_acquired) {
2800 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2801 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2807 kmp_user_lock_p lck;
2809 if ((__kmp_user_lock_kind == lk_tas) &&
2810 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2811 lck = (kmp_user_lock_p)user_lock;
2814 else if ((__kmp_user_lock_kind == lk_futex) &&
2815 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2816 lck = (kmp_user_lock_p)user_lock;
2820 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2824 __kmp_itt_lock_acquiring(lck);
2826#if OMPT_SUPPORT && OMPT_OPTIONAL
2828 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2830 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2831 if (ompt_enabled.ompt_callback_mutex_acquire) {
2832 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2833 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2834 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2838 ACQUIRE_LOCK(lck, gtid);
2841 __kmp_itt_lock_acquired(lck);
2844#if OMPT_SUPPORT && OMPT_OPTIONAL
2845 if (ompt_enabled.ompt_callback_mutex_acquired) {
2846 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2847 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2854void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2855#if KMP_USE_DYNAMIC_LOCK
2858 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2860#if OMPT_SUPPORT && OMPT_OPTIONAL
2862 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2864 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2865 if (ompt_enabled.enabled) {
2866 if (ompt_enabled.ompt_callback_mutex_acquire) {
2867 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2868 ompt_mutex_nest_lock, omp_lock_hint_none,
2869 __ompt_get_mutex_impl_type(user_lock),
2870 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2874 int acquire_status =
2875 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2876 (void)acquire_status;
2878 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2881#if OMPT_SUPPORT && OMPT_OPTIONAL
2882 if (ompt_enabled.enabled) {
2883 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2884 if (ompt_enabled.ompt_callback_mutex_acquired) {
2886 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2887 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2891 if (ompt_enabled.ompt_callback_nest_lock) {
2893 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2894 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2902 kmp_user_lock_p lck;
2904 if ((__kmp_user_lock_kind == lk_tas) &&
2905 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2906 OMP_NEST_LOCK_T_SIZE)) {
2907 lck = (kmp_user_lock_p)user_lock;
2910 else if ((__kmp_user_lock_kind == lk_futex) &&
2911 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2912 OMP_NEST_LOCK_T_SIZE)) {
2913 lck = (kmp_user_lock_p)user_lock;
2917 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2921 __kmp_itt_lock_acquiring(lck);
2923#if OMPT_SUPPORT && OMPT_OPTIONAL
2925 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2927 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2928 if (ompt_enabled.enabled) {
2929 if (ompt_enabled.ompt_callback_mutex_acquire) {
2930 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2931 ompt_mutex_nest_lock, omp_lock_hint_none,
2932 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2938 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2941 __kmp_itt_lock_acquired(lck);
2944#if OMPT_SUPPORT && OMPT_OPTIONAL
2945 if (ompt_enabled.enabled) {
2946 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2947 if (ompt_enabled.ompt_callback_mutex_acquired) {
2949 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2950 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2953 if (ompt_enabled.ompt_callback_nest_lock) {
2955 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2956 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2965void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2966#if KMP_USE_DYNAMIC_LOCK
2968 int tag = KMP_EXTRACT_D_TAG(user_lock);
2970 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2972#if KMP_USE_INLINED_TAS
2973 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2974 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2976#elif KMP_USE_INLINED_FUTEX
2977 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2978 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2982 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2985#if OMPT_SUPPORT && OMPT_OPTIONAL
2987 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2989 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2990 if (ompt_enabled.ompt_callback_mutex_released) {
2991 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2992 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2998 kmp_user_lock_p lck;
3003 if ((__kmp_user_lock_kind == lk_tas) &&
3004 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3005#if KMP_OS_LINUX && \
3006 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3009 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3011 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3014#if OMPT_SUPPORT && OMPT_OPTIONAL
3016 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3018 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3019 if (ompt_enabled.ompt_callback_mutex_released) {
3020 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3021 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3027 lck = (kmp_user_lock_p)user_lock;
3031 else if ((__kmp_user_lock_kind == lk_futex) &&
3032 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3033 lck = (kmp_user_lock_p)user_lock;
3037 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
3041 __kmp_itt_lock_releasing(lck);
3044 RELEASE_LOCK(lck, gtid);
3046#if OMPT_SUPPORT && OMPT_OPTIONAL
3048 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3050 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3051 if (ompt_enabled.ompt_callback_mutex_released) {
3052 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3053 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3061void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3062#if KMP_USE_DYNAMIC_LOCK
3065 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3067 int release_status =
3068 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3069 (void)release_status;
3071#if OMPT_SUPPORT && OMPT_OPTIONAL
3073 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3075 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3076 if (ompt_enabled.enabled) {
3077 if (release_status == KMP_LOCK_RELEASED) {
3078 if (ompt_enabled.ompt_callback_mutex_released) {
3080 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3081 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3084 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3086 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3087 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3094 kmp_user_lock_p lck;
3098 if ((__kmp_user_lock_kind == lk_tas) &&
3099 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3100 OMP_NEST_LOCK_T_SIZE)) {
3101#if KMP_OS_LINUX && \
3102 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3104 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3106 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3109#if OMPT_SUPPORT && OMPT_OPTIONAL
3110 int release_status = KMP_LOCK_STILL_HELD;
3113 if (--(tl->lk.depth_locked) == 0) {
3114 TCW_4(tl->lk.poll, 0);
3115#if OMPT_SUPPORT && OMPT_OPTIONAL
3116 release_status = KMP_LOCK_RELEASED;
3121#if OMPT_SUPPORT && OMPT_OPTIONAL
3123 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3125 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3126 if (ompt_enabled.enabled) {
3127 if (release_status == KMP_LOCK_RELEASED) {
3128 if (ompt_enabled.ompt_callback_mutex_released) {
3130 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3131 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3133 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3135 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3136 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3143 lck = (kmp_user_lock_p)user_lock;
3147 else if ((__kmp_user_lock_kind == lk_futex) &&
3148 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3149 OMP_NEST_LOCK_T_SIZE)) {
3150 lck = (kmp_user_lock_p)user_lock;
3154 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
3158 __kmp_itt_lock_releasing(lck);
3162 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3163#if OMPT_SUPPORT && OMPT_OPTIONAL
3165 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3167 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3168 if (ompt_enabled.enabled) {
3169 if (release_status == KMP_LOCK_RELEASED) {
3170 if (ompt_enabled.ompt_callback_mutex_released) {
3172 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3173 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3175 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3177 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3178 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3187int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3190#if KMP_USE_DYNAMIC_LOCK
3192 int tag = KMP_EXTRACT_D_TAG(user_lock);
3194 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3196#if OMPT_SUPPORT && OMPT_OPTIONAL
3198 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3200 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3201 if (ompt_enabled.ompt_callback_mutex_acquire) {
3202 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3203 ompt_mutex_test_lock, omp_lock_hint_none,
3204 __ompt_get_mutex_impl_type(user_lock),
3205 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3208#if KMP_USE_INLINED_TAS
3209 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3210 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3212#elif KMP_USE_INLINED_FUTEX
3213 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3214 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3218 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3222 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3224#if OMPT_SUPPORT && OMPT_OPTIONAL
3225 if (ompt_enabled.ompt_callback_mutex_acquired) {
3226 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3227 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3233 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3240 kmp_user_lock_p lck;
3243 if ((__kmp_user_lock_kind == lk_tas) &&
3244 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3245 lck = (kmp_user_lock_p)user_lock;
3248 else if ((__kmp_user_lock_kind == lk_futex) &&
3249 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3250 lck = (kmp_user_lock_p)user_lock;
3254 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3258 __kmp_itt_lock_acquiring(lck);
3260#if OMPT_SUPPORT && OMPT_OPTIONAL
3262 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3264 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3265 if (ompt_enabled.ompt_callback_mutex_acquire) {
3266 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3267 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3268 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3272 rc = TEST_LOCK(lck, gtid);
3275 __kmp_itt_lock_acquired(lck);
3277 __kmp_itt_lock_cancelled(lck);
3280#if OMPT_SUPPORT && OMPT_OPTIONAL
3281 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3282 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3283 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3287 return (rc ? FTN_TRUE : FTN_FALSE);
3295int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3296#if KMP_USE_DYNAMIC_LOCK
3299 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3301#if OMPT_SUPPORT && OMPT_OPTIONAL
3303 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3305 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3306 if (ompt_enabled.ompt_callback_mutex_acquire) {
3307 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3308 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3309 __ompt_get_mutex_impl_type(user_lock),
3310 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3313 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3316 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3318 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3321#if OMPT_SUPPORT && OMPT_OPTIONAL
3322 if (ompt_enabled.enabled && rc) {
3324 if (ompt_enabled.ompt_callback_mutex_acquired) {
3326 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3327 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3331 if (ompt_enabled.ompt_callback_nest_lock) {
3333 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3334 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3343 kmp_user_lock_p lck;
3346 if ((__kmp_user_lock_kind == lk_tas) &&
3347 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3348 OMP_NEST_LOCK_T_SIZE)) {
3349 lck = (kmp_user_lock_p)user_lock;
3352 else if ((__kmp_user_lock_kind == lk_futex) &&
3353 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3354 OMP_NEST_LOCK_T_SIZE)) {
3355 lck = (kmp_user_lock_p)user_lock;
3359 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3363 __kmp_itt_lock_acquiring(lck);
3366#if OMPT_SUPPORT && OMPT_OPTIONAL
3368 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3370 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3371 if (ompt_enabled.enabled) &&
3372 ompt_enabled.ompt_callback_mutex_acquire) {
3373 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3374 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3375 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3380 rc = TEST_NESTED_LOCK(lck, gtid);
3383 __kmp_itt_lock_acquired(lck);
3385 __kmp_itt_lock_cancelled(lck);
3388#if OMPT_SUPPORT && OMPT_OPTIONAL
3389 if (ompt_enabled.enabled && rc) {
3391 if (ompt_enabled.ompt_callback_mutex_acquired) {
3393 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3394 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3397 if (ompt_enabled.ompt_callback_nest_lock) {
3399 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3400 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3419#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3420 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3422#define __KMP_GET_REDUCTION_METHOD(gtid) \
3423 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3429static __forceinline
void
3430__kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3431 kmp_critical_name *crit) {
3437 kmp_user_lock_p lck;
3439#if KMP_USE_DYNAMIC_LOCK
3441 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3444 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3445 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3446 KMP_GET_D_TAG(__kmp_user_lock_seq));
3448 __kmp_init_indirect_csptr(crit, loc, global_tid,
3449 KMP_GET_I_TAG(__kmp_user_lock_seq));
3455 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3456 lck = (kmp_user_lock_p)lk;
3457 KMP_DEBUG_ASSERT(lck != NULL);
3458 if (__kmp_env_consistency_check) {
3459 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3461 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3463 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3465 KMP_DEBUG_ASSERT(lck != NULL);
3466 if (__kmp_env_consistency_check) {
3467 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3469 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3477 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3478 lck = (kmp_user_lock_p)crit;
3480 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3482 KMP_DEBUG_ASSERT(lck != NULL);
3484 if (__kmp_env_consistency_check)
3485 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3487 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3493static __forceinline
void
3494__kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3495 kmp_critical_name *crit) {
3497 kmp_user_lock_p lck;
3499#if KMP_USE_DYNAMIC_LOCK
3501 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3502 lck = (kmp_user_lock_p)crit;
3503 if (__kmp_env_consistency_check)
3504 __kmp_pop_sync(global_tid, ct_critical, loc);
3505 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3507 kmp_indirect_lock_t *ilk =
3508 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3509 if (__kmp_env_consistency_check)
3510 __kmp_pop_sync(global_tid, ct_critical, loc);
3511 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3519 if (__kmp_base_user_lock_size > 32) {
3520 lck = *((kmp_user_lock_p *)crit);
3521 KMP_ASSERT(lck != NULL);
3523 lck = (kmp_user_lock_p)crit;
3526 if (__kmp_env_consistency_check)
3527 __kmp_pop_sync(global_tid, ct_critical, loc);
3529 __kmp_release_user_lock_with_checks(lck, global_tid);
3534static __forceinline
int
3535__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3540 if (th->th.th_teams_microtask) {
3541 *team_p = team = th->th.th_team;
3542 if (team->t.t_level == th->th.th_teams_level) {
3544 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3546 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3547 th->th.th_team = team->t.t_parent;
3548 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3549 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3550 *task_state = th->th.th_task_state;
3551 th->th.th_task_state = 0;
3559static __forceinline
void
3560__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3562 th->th.th_info.ds.ds_tid = 0;
3563 th->th.th_team = team;
3564 th->th.th_team_nproc = team->t.t_nproc;
3565 th->th.th_task_team = team->t.t_task_team[task_state];
3566 __kmp_type_convert(task_state, &(th->th.th_task_state));
3587 size_t reduce_size,
void *reduce_data,
3588 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3589 kmp_critical_name *lck) {
3593 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3596 int teams_swapped = 0, task_state;
3597 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3598 __kmp_assert_valid_gtid(global_tid);
3606 if (!TCR_4(__kmp_init_parallel))
3607 __kmp_parallel_initialize();
3609 __kmp_resume_if_soft_paused();
3612#if KMP_USE_DYNAMIC_LOCK
3613 if (__kmp_env_consistency_check)
3614 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3616 if (__kmp_env_consistency_check)
3617 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3620 th = __kmp_thread_from_gtid(global_tid);
3621 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3639 packed_reduction_method = __kmp_determine_reduction_method(
3640 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3641 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3643 OMPT_REDUCTION_DECL(th, global_tid);
3644 if (packed_reduction_method == critical_reduce_block) {
3646 OMPT_REDUCTION_BEGIN;
3648 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3651 }
else if (packed_reduction_method == empty_reduce_block) {
3653 OMPT_REDUCTION_BEGIN;
3659 }
else if (packed_reduction_method == atomic_reduce_block) {
3669 if (__kmp_env_consistency_check)
3670 __kmp_pop_sync(global_tid, ct_reduce, loc);
3672 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3673 tree_reduce_block)) {
3693 ompt_frame_t *ompt_frame;
3694 if (ompt_enabled.enabled) {
3695 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3696 if (ompt_frame->enter_frame.ptr == NULL)
3697 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3699 OMPT_STORE_RETURN_ADDRESS(global_tid);
3702 __kmp_threads[global_tid]->th.th_ident = loc;
3705 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3706 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3707 retval = (retval != 0) ? (0) : (1);
3708#if OMPT_SUPPORT && OMPT_OPTIONAL
3709 if (ompt_enabled.enabled) {
3710 ompt_frame->enter_frame = ompt_data_none;
3716 if (__kmp_env_consistency_check) {
3718 __kmp_pop_sync(global_tid, ct_reduce, loc);
3727 if (teams_swapped) {
3728 __kmp_restore_swapped_teams(th, team, task_state);
3732 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3733 global_tid, packed_reduction_method, retval));
3747 kmp_critical_name *lck) {
3749 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3751 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3752 __kmp_assert_valid_gtid(global_tid);
3754 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3756 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3758 if (packed_reduction_method == critical_reduce_block) {
3760 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3763 }
else if (packed_reduction_method == empty_reduce_block) {
3770 }
else if (packed_reduction_method == atomic_reduce_block) {
3777 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3778 tree_reduce_block)) {
3789 if (__kmp_env_consistency_check)
3790 __kmp_pop_sync(global_tid, ct_reduce, loc);
3792 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3793 global_tid, packed_reduction_method));
3816 size_t reduce_size,
void *reduce_data,
3817 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3818 kmp_critical_name *lck) {
3821 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3824 int teams_swapped = 0, task_state;
3826 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3827 __kmp_assert_valid_gtid(global_tid);
3835 if (!TCR_4(__kmp_init_parallel))
3836 __kmp_parallel_initialize();
3838 __kmp_resume_if_soft_paused();
3841#if KMP_USE_DYNAMIC_LOCK
3842 if (__kmp_env_consistency_check)
3843 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3845 if (__kmp_env_consistency_check)
3846 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3849 th = __kmp_thread_from_gtid(global_tid);
3850 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3852 packed_reduction_method = __kmp_determine_reduction_method(
3853 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3854 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3856 OMPT_REDUCTION_DECL(th, global_tid);
3858 if (packed_reduction_method == critical_reduce_block) {
3860 OMPT_REDUCTION_BEGIN;
3861 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3864 }
else if (packed_reduction_method == empty_reduce_block) {
3866 OMPT_REDUCTION_BEGIN;
3871 }
else if (packed_reduction_method == atomic_reduce_block) {
3875 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3876 tree_reduce_block)) {
3882 ompt_frame_t *ompt_frame;
3883 if (ompt_enabled.enabled) {
3884 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3885 if (ompt_frame->enter_frame.ptr == NULL)
3886 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3888 OMPT_STORE_RETURN_ADDRESS(global_tid);
3891 __kmp_threads[global_tid]->th.th_ident =
3895 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3896 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3897 retval = (retval != 0) ? (0) : (1);
3898#if OMPT_SUPPORT && OMPT_OPTIONAL
3899 if (ompt_enabled.enabled) {
3900 ompt_frame->enter_frame = ompt_data_none;
3906 if (__kmp_env_consistency_check) {
3908 __kmp_pop_sync(global_tid, ct_reduce, loc);
3917 if (teams_swapped) {
3918 __kmp_restore_swapped_teams(th, team, task_state);
3922 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3923 global_tid, packed_reduction_method, retval));
3938 kmp_critical_name *lck) {
3940 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3943 int teams_swapped = 0, task_state;
3945 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3946 __kmp_assert_valid_gtid(global_tid);
3948 th = __kmp_thread_from_gtid(global_tid);
3949 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3951 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3955 OMPT_REDUCTION_DECL(th, global_tid);
3957 if (packed_reduction_method == critical_reduce_block) {
3958 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3964 ompt_frame_t *ompt_frame;
3965 if (ompt_enabled.enabled) {
3966 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3967 if (ompt_frame->enter_frame.ptr == NULL)
3968 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3970 OMPT_STORE_RETURN_ADDRESS(global_tid);
3973 __kmp_threads[global_tid]->th.th_ident = loc;
3975 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3976#if OMPT_SUPPORT && OMPT_OPTIONAL
3977 if (ompt_enabled.enabled) {
3978 ompt_frame->enter_frame = ompt_data_none;
3982 }
else if (packed_reduction_method == empty_reduce_block) {
3990 ompt_frame_t *ompt_frame;
3991 if (ompt_enabled.enabled) {
3992 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3993 if (ompt_frame->enter_frame.ptr == NULL)
3994 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3996 OMPT_STORE_RETURN_ADDRESS(global_tid);
3999 __kmp_threads[global_tid]->th.th_ident = loc;
4001 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4002#if OMPT_SUPPORT && OMPT_OPTIONAL
4003 if (ompt_enabled.enabled) {
4004 ompt_frame->enter_frame = ompt_data_none;
4008 }
else if (packed_reduction_method == atomic_reduce_block) {
4011 ompt_frame_t *ompt_frame;
4012 if (ompt_enabled.enabled) {
4013 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4014 if (ompt_frame->enter_frame.ptr == NULL)
4015 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4017 OMPT_STORE_RETURN_ADDRESS(global_tid);
4021 __kmp_threads[global_tid]->th.th_ident = loc;
4023 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4024#if OMPT_SUPPORT && OMPT_OPTIONAL
4025 if (ompt_enabled.enabled) {
4026 ompt_frame->enter_frame = ompt_data_none;
4030 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4031 tree_reduce_block)) {
4034 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4042 if (teams_swapped) {
4043 __kmp_restore_swapped_teams(th, team, task_state);
4046 if (__kmp_env_consistency_check)
4047 __kmp_pop_sync(global_tid, ct_reduce, loc);
4049 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4050 global_tid, packed_reduction_method));
4055#undef __KMP_GET_REDUCTION_METHOD
4056#undef __KMP_SET_REDUCTION_METHOD
4060kmp_uint64 __kmpc_get_taskid() {
4065 gtid = __kmp_get_gtid();
4069 thread = __kmp_thread_from_gtid(gtid);
4070 return thread->th.th_current_task->td_task_id;
4074kmp_uint64 __kmpc_get_parent_taskid() {
4078 kmp_taskdata_t *parent_task;
4080 gtid = __kmp_get_gtid();
4084 thread = __kmp_thread_from_gtid(gtid);
4085 parent_task = thread->th.th_current_task->td_parent;
4086 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4102 const struct kmp_dim *dims) {
4103 __kmp_assert_valid_gtid(gtid);
4105 kmp_int64 last, trace_count;
4106 kmp_info_t *th = __kmp_threads[gtid];
4107 kmp_team_t *team = th->th.th_team;
4109 kmp_disp_t *pr_buf = th->th.th_dispatch;
4110 dispatch_shared_info_t *sh_buf;
4114 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4115 gtid, num_dims, !team->t.t_serialized));
4116 KMP_DEBUG_ASSERT(dims != NULL);
4117 KMP_DEBUG_ASSERT(num_dims > 0);
4119 if (team->t.t_serialized) {
4120 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
4123 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4124 idx = pr_buf->th_doacross_buf_idx++;
4126 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4129 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4130 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4131 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
4132 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4133 pr_buf->th_doacross_info[0] =
4134 (kmp_int64)num_dims;
4137 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4138 pr_buf->th_doacross_info[2] = dims[0].lo;
4139 pr_buf->th_doacross_info[3] = dims[0].up;
4140 pr_buf->th_doacross_info[4] = dims[0].st;
4142 for (j = 1; j < num_dims; ++j) {
4145 if (dims[j].st == 1) {
4147 range_length = dims[j].up - dims[j].lo + 1;
4149 if (dims[j].st > 0) {
4150 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4151 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4153 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4155 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4158 pr_buf->th_doacross_info[last++] = range_length;
4159 pr_buf->th_doacross_info[last++] = dims[j].lo;
4160 pr_buf->th_doacross_info[last++] = dims[j].up;
4161 pr_buf->th_doacross_info[last++] = dims[j].st;
4166 if (dims[0].st == 1) {
4167 trace_count = dims[0].up - dims[0].lo + 1;
4168 }
else if (dims[0].st > 0) {
4169 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4170 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4172 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4173 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4175 for (j = 1; j < num_dims; ++j) {
4176 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
4178 KMP_DEBUG_ASSERT(trace_count > 0);
4182 if (idx != sh_buf->doacross_buf_idx) {
4184 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4191 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4192 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4194 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4195 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4197 if (flags == NULL) {
4200 (size_t)trace_count / 8 + 8;
4201 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4203 sh_buf->doacross_flags = flags;
4204 }
else if (flags == (kmp_uint32 *)1) {
4207 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4209 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4216 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4217 pr_buf->th_doacross_flags =
4218 sh_buf->doacross_flags;
4220 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4223void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4224 __kmp_assert_valid_gtid(gtid);
4228 kmp_int64 iter_number;
4229 kmp_info_t *th = __kmp_threads[gtid];
4230 kmp_team_t *team = th->th.th_team;
4232 kmp_int64 lo, up, st;
4234 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4235 if (team->t.t_serialized) {
4236 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4241 pr_buf = th->th.th_dispatch;
4242 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4243 num_dims = (size_t)pr_buf->th_doacross_info[0];
4244 lo = pr_buf->th_doacross_info[2];
4245 up = pr_buf->th_doacross_info[3];
4246 st = pr_buf->th_doacross_info[4];
4247#if OMPT_SUPPORT && OMPT_OPTIONAL
4251 if (vec[0] < lo || vec[0] > up) {
4252 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4253 "bounds [%lld,%lld]\n",
4254 gtid, vec[0], lo, up));
4257 iter_number = vec[0] - lo;
4258 }
else if (st > 0) {
4259 if (vec[0] < lo || vec[0] > up) {
4260 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4261 "bounds [%lld,%lld]\n",
4262 gtid, vec[0], lo, up));
4265 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4267 if (vec[0] > lo || vec[0] < up) {
4268 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4269 "bounds [%lld,%lld]\n",
4270 gtid, vec[0], lo, up));
4273 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4275#if OMPT_SUPPORT && OMPT_OPTIONAL
4276 deps[0].variable.value = iter_number;
4277 deps[0].dependence_type = ompt_dependence_type_sink;
4279 for (i = 1; i < num_dims; ++i) {
4282 ln = pr_buf->th_doacross_info[j + 1];
4283 lo = pr_buf->th_doacross_info[j + 2];
4284 up = pr_buf->th_doacross_info[j + 3];
4285 st = pr_buf->th_doacross_info[j + 4];
4287 if (vec[i] < lo || vec[i] > up) {
4288 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4289 "bounds [%lld,%lld]\n",
4290 gtid, vec[i], lo, up));
4294 }
else if (st > 0) {
4295 if (vec[i] < lo || vec[i] > up) {
4296 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4297 "bounds [%lld,%lld]\n",
4298 gtid, vec[i], lo, up));
4301 iter = (kmp_uint64)(vec[i] - lo) / st;
4303 if (vec[i] > lo || vec[i] < up) {
4304 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4305 "bounds [%lld,%lld]\n",
4306 gtid, vec[i], lo, up));
4309 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4311 iter_number = iter + ln * iter_number;
4312#if OMPT_SUPPORT && OMPT_OPTIONAL
4313 deps[i].variable.value = iter;
4314 deps[i].dependence_type = ompt_dependence_type_sink;
4317 shft = iter_number % 32;
4320 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4324#if OMPT_SUPPORT && OMPT_OPTIONAL
4325 if (ompt_enabled.ompt_callback_dependences) {
4326 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4327 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4331 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4332 gtid, (iter_number << 5) + shft));
4335void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4336 __kmp_assert_valid_gtid(gtid);
4340 kmp_int64 iter_number;
4341 kmp_info_t *th = __kmp_threads[gtid];
4342 kmp_team_t *team = th->th.th_team;
4346 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4347 if (team->t.t_serialized) {
4348 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4354 pr_buf = th->th.th_dispatch;
4355 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4356 num_dims = (size_t)pr_buf->th_doacross_info[0];
4357 lo = pr_buf->th_doacross_info[2];
4358 st = pr_buf->th_doacross_info[4];
4359#if OMPT_SUPPORT && OMPT_OPTIONAL
4363 iter_number = vec[0] - lo;
4364 }
else if (st > 0) {
4365 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4367 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4369#if OMPT_SUPPORT && OMPT_OPTIONAL
4370 deps[0].variable.value = iter_number;
4371 deps[0].dependence_type = ompt_dependence_type_source;
4373 for (i = 1; i < num_dims; ++i) {
4376 ln = pr_buf->th_doacross_info[j + 1];
4377 lo = pr_buf->th_doacross_info[j + 2];
4378 st = pr_buf->th_doacross_info[j + 4];
4381 }
else if (st > 0) {
4382 iter = (kmp_uint64)(vec[i] - lo) / st;
4384 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4386 iter_number = iter + ln * iter_number;
4387#if OMPT_SUPPORT && OMPT_OPTIONAL
4388 deps[i].variable.value = iter;
4389 deps[i].dependence_type = ompt_dependence_type_source;
4392#if OMPT_SUPPORT && OMPT_OPTIONAL
4393 if (ompt_enabled.ompt_callback_dependences) {
4394 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4395 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4398 shft = iter_number % 32;
4402 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4403 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4404 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4405 (iter_number << 5) + shft));
4408void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4409 __kmp_assert_valid_gtid(gtid);
4411 kmp_info_t *th = __kmp_threads[gtid];
4412 kmp_team_t *team = th->th.th_team;
4413 kmp_disp_t *pr_buf = th->th.th_dispatch;
4415 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4416 if (team->t.t_serialized) {
4417 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4421 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4422 if (num_done == th->th.th_team_nproc) {
4424 int idx = pr_buf->th_doacross_buf_idx - 1;
4425 dispatch_shared_info_t *sh_buf =
4426 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4427 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4428 (kmp_int64)&sh_buf->doacross_num_done);
4429 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4430 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4431 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4432 sh_buf->doacross_flags = NULL;
4433 sh_buf->doacross_num_done = 0;
4434 sh_buf->doacross_buf_idx +=
4435 __kmp_dispatch_num_buffers;
4438 pr_buf->th_doacross_flags = NULL;
4439 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4440 pr_buf->th_doacross_info = NULL;
4441 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4445void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4446 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4449void *omp_aligned_alloc(
size_t align,
size_t size,
4450 omp_allocator_handle_t allocator) {
4451 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4454void *omp_calloc(
size_t nmemb,
size_t size, omp_allocator_handle_t allocator) {
4455 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4458void *omp_aligned_calloc(
size_t align,
size_t nmemb,
size_t size,
4459 omp_allocator_handle_t allocator) {
4460 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4463void *omp_realloc(
void *ptr,
size_t size, omp_allocator_handle_t allocator,
4464 omp_allocator_handle_t free_allocator) {
4465 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4469void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4470 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4474int __kmpc_get_target_offload(
void) {
4475 if (!__kmp_init_serial) {
4476 __kmp_serial_initialize();
4478 return __kmp_target_offload;
4481int __kmpc_pause_resource(kmp_pause_status_t level) {
4482 if (!__kmp_init_serial) {
4485 return __kmp_pause_resource(level);
4488void __kmpc_error(
ident_t *loc,
int severity,
const char *message) {
4489 if (!__kmp_init_serial)
4490 __kmp_serial_initialize();
4492 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4495 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4496 ompt_callbacks.ompt_callback(ompt_callback_error)(
4497 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4498 OMPT_GET_RETURN_ADDRESS(0));
4504 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->
psource,
false);
4506 __kmp_str_format(
"%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4507 __kmp_str_loc_free(&str_loc);
4509 src_loc = __kmp_str_format(
"unknown");
4512 if (severity == severity_warning)
4513 KMP_WARNING(UserDirectedWarning, src_loc, message);
4515 KMP_FATAL(UserDirectedError, src_loc, message);
4517 __kmp_str_free(&src_loc);
4521void __kmpc_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4523#if OMPT_SUPPORT && OMPT_OPTIONAL
4524 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4525 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4526 int tid = __kmp_tid_from_gtid(gtid);
4527 ompt_callbacks.ompt_callback(ompt_callback_work)(
4528 ompt_work_scope, ompt_scope_begin,
4529 &(team->t.ompt_team_info.parallel_data),
4530 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4531 OMPT_GET_RETURN_ADDRESS(0));
4537void __kmpc_end_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4539#if OMPT_SUPPORT && OMPT_OPTIONAL
4540 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4541 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4542 int tid = __kmp_tid_from_gtid(gtid);
4543 ompt_callbacks.ompt_callback(ompt_callback_work)(
4544 ompt_work_scope, ompt_scope_end,
4545 &(team->t.ompt_team_info.parallel_data),
4546 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4547 OMPT_GET_RETURN_ADDRESS(0));
4552#ifdef KMP_USE_VERSION_SYMBOLS
4561#ifdef omp_set_affinity_format
4562#undef omp_set_affinity_format
4564#ifdef omp_get_affinity_format
4565#undef omp_get_affinity_format
4567#ifdef omp_display_affinity
4568#undef omp_display_affinity
4570#ifdef omp_capture_affinity
4571#undef omp_capture_affinity
4573KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4575KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4577KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4579KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
@ KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_DISTRIBUTE
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
stats_state_e
the states which a thread can be in
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)