LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
39 enum flag_type {
43 };
44 
45 struct flag_properties {
46  unsigned int type : 16;
47  unsigned int reserved : 16;
48 };
49 
53 template <typename P> class kmp_flag_native {
54  volatile P *loc;
55  flag_properties t;
56 
57 public:
58  typedef P flag_t;
59  kmp_flag_native(volatile P *p, flag_type ft)
60  : loc(p), t({(short unsigned int)ft, 0U}) {}
61  volatile P *get() { return loc; }
62  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
63  void set(volatile P *new_loc) { loc = new_loc; }
64  flag_type get_type() { return (flag_type)(t.type); }
65  P load() { return *loc; }
66  void store(P val) { *loc = val; }
67 };
68 
72 template <typename P> class kmp_flag {
73  std::atomic<P>
74  *loc;
76  flag_properties t;
77 public:
78  typedef P flag_t;
79  kmp_flag(std::atomic<P> *p, flag_type ft)
80  : loc(p), t({(short unsigned int)ft, 0U}) {}
84  std::atomic<P> *get() { return loc; }
88  void *get_void_p() { return RCAST(void *, loc); }
92  void set(std::atomic<P> *new_loc) { loc = new_loc; }
96  flag_type get_type() { return (flag_type)(t.type); }
100  P load() { return loc->load(std::memory_order_acquire); }
104  void store(P val) { loc->store(val, std::memory_order_release); }
105  // Derived classes must provide the following:
106  /*
107  kmp_info_t * get_waiter(kmp_uint32 i);
108  kmp_uint32 get_num_waiters();
109  bool done_check();
110  bool done_check_val(P old_loc);
111  bool notdone_check();
112  P internal_release();
113  void suspend(int th_gtid);
114  void mwait(int th_gtid);
115  void resume(int th_gtid);
116  P set_sleeping();
117  P unset_sleeping();
118  bool is_sleeping();
119  bool is_any_sleeping();
120  bool is_sleeping_val(P old_loc);
121  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
122  int *thread_finished
123  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
124  is_constrained);
125  */
126 };
127 
128 #if OMPT_SUPPORT
129 OMPT_NOINLINE
130 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
131  ompt_state_t ompt_state,
132  ompt_data_t *tId) {
133  int ds_tid = this_thr->th.th_info.ds.ds_tid;
134  if (ompt_state == ompt_state_wait_barrier_implicit) {
135  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
136 #if OMPT_OPTIONAL
137  void *codeptr = NULL;
138  if (ompt_enabled.ompt_callback_sync_region_wait) {
139  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
140  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
141  codeptr);
142  }
143  if (ompt_enabled.ompt_callback_sync_region) {
144  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
145  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
146  codeptr);
147  }
148 #endif
149  if (!KMP_MASTER_TID(ds_tid)) {
150  if (ompt_enabled.ompt_callback_implicit_task) {
151  int flags = this_thr->th.ompt_thread_info.parallel_flags;
152  flags = (flags & ompt_parallel_league) ? ompt_task_initial
153  : ompt_task_implicit;
154  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
155  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
156  }
157  // return to idle state
158  this_thr->th.ompt_thread_info.state = ompt_state_idle;
159  } else {
160  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
161  }
162  }
163 }
164 #endif
165 
166 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
167  __kmp_wait_* must make certain that another thread calls __kmp_release
168  to wake it back up to prevent deadlocks!
169 
170  NOTE: We may not belong to a team at this point. */
171 template <class C, bool final_spin, bool Cancellable = false,
172  bool Sleepable = true>
173 static inline bool
174 __kmp_wait_template(kmp_info_t *this_thr,
175  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
176 #if USE_ITT_BUILD && USE_ITT_NOTIFY
177  volatile void *spin = flag->get();
178 #endif
179  kmp_uint32 spins;
180  int th_gtid;
181  int tasks_completed = FALSE;
182 #if !KMP_USE_MONITOR
183  kmp_uint64 poll_count;
184  kmp_uint64 hibernate_goal;
185 #else
186  kmp_uint32 hibernate;
187 #endif
188 
189  KMP_FSYNC_SPIN_INIT(spin, NULL);
190  if (flag->done_check()) {
191  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
192  return false;
193  }
194  th_gtid = this_thr->th.th_info.ds.ds_gtid;
195  if (Cancellable) {
196  kmp_team_t *team = this_thr->th.th_team;
197  if (team && team->t.t_cancel_request == cancel_parallel)
198  return true;
199  }
200 #if KMP_OS_UNIX
201  if (final_spin)
202  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
203 #endif
204  KA_TRACE(20,
205  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
206 #if KMP_STATS_ENABLED
207  stats_state_e thread_state = KMP_GET_THREAD_STATE();
208 #endif
209 
210 /* OMPT Behavior:
211 THIS function is called from
212  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
213  these have join / fork behavior
214 
215  In these cases, we don't change the state or trigger events in THIS
216 function.
217  Events are triggered in the calling code (__kmp_barrier):
218 
219  state := ompt_state_overhead
220  barrier-begin
221  barrier-wait-begin
222  state := ompt_state_wait_barrier
223  call join-barrier-implementation (finally arrive here)
224  {}
225  call fork-barrier-implementation (finally arrive here)
226  {}
227  state := ompt_state_overhead
228  barrier-wait-end
229  barrier-end
230  state := ompt_state_work_parallel
231 
232 
233  __kmp_fork_barrier (after thread creation, before executing implicit task)
234  call fork-barrier-implementation (finally arrive here)
235  {} // worker arrive here with state = ompt_state_idle
236 
237 
238  __kmp_join_barrier (implicit barrier at end of parallel region)
239  state := ompt_state_barrier_implicit
240  barrier-begin
241  barrier-wait-begin
242  call join-barrier-implementation (finally arrive here
243 final_spin=FALSE)
244  {
245  }
246  __kmp_fork_barrier (implicit barrier at end of parallel region)
247  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
248 
249  Worker after task-team is finished:
250  barrier-wait-end
251  barrier-end
252  implicit-task-end
253  idle-begin
254  state := ompt_state_idle
255 
256  Before leaving, if state = ompt_state_idle
257  idle-end
258  state := ompt_state_overhead
259 */
260 #if OMPT_SUPPORT
261  ompt_state_t ompt_entry_state;
262  ompt_data_t *tId;
263  if (ompt_enabled.enabled) {
264  ompt_entry_state = this_thr->th.ompt_thread_info.state;
265  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
266  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
267  ompt_lw_taskteam_t *team =
268  this_thr->th.th_team->t.ompt_serialized_team_info;
269  if (team) {
270  tId = &(team->ompt_task_info.task_data);
271  } else {
272  tId = OMPT_CUR_TASK_DATA(this_thr);
273  }
274  } else {
275  tId = &(this_thr->th.ompt_thread_info.task_data);
276  }
277  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
278  this_thr->th.th_task_team == NULL)) {
279  // implicit task is done. Either no taskqueue, or task-team finished
280  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
281  }
282  }
283 #endif
284 
285  KMP_INIT_YIELD(spins); // Setup for waiting
286 
287  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
288  __kmp_pause_status == kmp_soft_paused) {
289 #if KMP_USE_MONITOR
290 // The worker threads cannot rely on the team struct existing at this point.
291 // Use the bt values cached in the thread struct instead.
292 #ifdef KMP_ADJUST_BLOCKTIME
293  if (__kmp_pause_status == kmp_soft_paused ||
294  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
295  // Force immediate suspend if not set by user and more threads than
296  // available procs
297  hibernate = 0;
298  else
299  hibernate = this_thr->th.th_team_bt_intervals;
300 #else
301  hibernate = this_thr->th.th_team_bt_intervals;
302 #endif /* KMP_ADJUST_BLOCKTIME */
303 
304  /* If the blocktime is nonzero, we want to make sure that we spin wait for
305  the entirety of the specified #intervals, plus up to one interval more.
306  This increment make certain that this thread doesn't go to sleep too
307  soon. */
308  if (hibernate != 0)
309  hibernate++;
310 
311  // Add in the current time value.
312  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
313  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
314  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
315  hibernate - __kmp_global.g.g_time.dt.t_value));
316 #else
317  if (__kmp_pause_status == kmp_soft_paused) {
318  // Force immediate suspend
319  hibernate_goal = KMP_NOW();
320  } else
321  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
322  poll_count = 0;
323  (void)poll_count;
324 #endif // KMP_USE_MONITOR
325  }
326 
327  KMP_MB();
328 
329  // Main wait spin loop
330  while (flag->notdone_check()) {
331  kmp_task_team_t *task_team = NULL;
332  if (__kmp_tasking_mode != tskm_immediate_exec) {
333  task_team = this_thr->th.th_task_team;
334  /* If the thread's task team pointer is NULL, it means one of 3 things:
335  1) A newly-created thread is first being released by
336  __kmp_fork_barrier(), and its task team has not been set up yet.
337  2) All tasks have been executed to completion.
338  3) Tasking is off for this region. This could be because we are in a
339  serialized region (perhaps the outer one), or else tasking was manually
340  disabled (KMP_TASKING=0). */
341  if (task_team != NULL) {
342  if (TCR_SYNC_4(task_team->tt.tt_active)) {
343  if (KMP_TASKING_ENABLED(task_team))
344  flag->execute_tasks(
345  this_thr, th_gtid, final_spin,
346  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
347  else
348  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
349  } else {
350  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
351 #if OMPT_SUPPORT
352  // task-team is done now, other cases should be catched above
353  if (final_spin && ompt_enabled.enabled)
354  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
355 #endif
356  this_thr->th.th_task_team = NULL;
357  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
358  }
359  } else {
360  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
361  } // if
362  } // if
363 
364  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
365  if (TCR_4(__kmp_global.g.g_done)) {
366  if (__kmp_global.g.g_abort)
367  __kmp_abort_thread();
368  break;
369  }
370 
371  // If we are oversubscribed, or have waited a bit (and
372  // KMP_LIBRARY=throughput), then yield
373  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
374 
375 #if KMP_STATS_ENABLED
376  // Check if thread has been signalled to idle state
377  // This indicates that the logical "join-barrier" has finished
378  if (this_thr->th.th_stats->isIdle() &&
379  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
380  KMP_SET_THREAD_STATE(IDLE);
381  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
382  }
383 #endif
384  // Check if the barrier surrounding this wait loop has been cancelled
385  if (Cancellable) {
386  kmp_team_t *team = this_thr->th.th_team;
387  if (team && team->t.t_cancel_request == cancel_parallel)
388  break;
389  }
390 
391  // For hidden helper thread, if task_team is nullptr, it means the main
392  // thread has not released the barrier. We cannot wait here because once the
393  // main thread releases all children barriers, all hidden helper threads are
394  // still sleeping. This leads to a problem that following configuration,
395  // such as task team sync, will not be performed such that this thread does
396  // not have task team. Usually it is not bad. However, a corner case is,
397  // when the first task encountered is an untied task, the check in
398  // __kmp_task_alloc will crash because it uses the task team pointer without
399  // checking whether it is nullptr. It is probably under some kind of
400  // assumption.
401  if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
402  !TCR_4(__kmp_hidden_helper_team_done)) {
403  // If there is still hidden helper tasks to be executed, the hidden helper
404  // thread will not enter a waiting status.
405  if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
406  __kmp_hidden_helper_worker_thread_wait();
407  }
408  continue;
409  }
410 
411  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
412  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
413  __kmp_pause_status != kmp_soft_paused)
414  continue;
415 
416  // Don't suspend if there is a likelihood of new tasks being spawned.
417  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
418  continue;
419 
420 #if KMP_USE_MONITOR
421  // If we have waited a bit more, fall asleep
422  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
423  continue;
424 #else
425  if (KMP_BLOCKING(hibernate_goal, poll_count++))
426  continue;
427 #endif
428  // Don't suspend if wait loop designated non-sleepable
429  // in template parameters
430  if (!Sleepable)
431  continue;
432 
433  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
434  __kmp_pause_status != kmp_soft_paused)
435  continue;
436 
437 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
438  if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
439  KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
440  flag->mwait(th_gtid);
441  } else {
442 #endif
443  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
444 #if KMP_OS_UNIX
445  if (final_spin)
446  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
447 #endif
448  flag->suspend(th_gtid);
449 #if KMP_OS_UNIX
450  if (final_spin)
451  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
452 #endif
453 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
454  }
455 #endif
456 
457  if (TCR_4(__kmp_global.g.g_done)) {
458  if (__kmp_global.g.g_abort)
459  __kmp_abort_thread();
460  break;
461  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
462  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
463  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
464  }
465  // TODO: If thread is done with work and times out, disband/free
466  }
467 
468 #if OMPT_SUPPORT
469  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
470  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
471 #if OMPT_OPTIONAL
472  if (final_spin) {
473  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
474  ompt_exit_state = this_thr->th.ompt_thread_info.state;
475  }
476 #endif
477  if (ompt_exit_state == ompt_state_idle) {
478  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
479  }
480  }
481 #endif
482 #if KMP_STATS_ENABLED
483  // If we were put into idle state, pop that off the state stack
484  if (KMP_GET_THREAD_STATE() == IDLE) {
485  KMP_POP_PARTITIONED_TIMER();
486  KMP_SET_THREAD_STATE(thread_state);
487  this_thr->th.th_stats->resetIdleFlag();
488  }
489 #endif
490 
491 #if KMP_OS_UNIX
492  if (final_spin)
493  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
494 #endif
495  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
496  if (Cancellable) {
497  kmp_team_t *team = this_thr->th.th_team;
498  if (team && team->t.t_cancel_request == cancel_parallel) {
499  if (tasks_completed) {
500  // undo the previous decrement of unfinished_threads so that the
501  // thread can decrement at the join barrier with no problem
502  kmp_task_team_t *task_team = this_thr->th.th_task_team;
503  std::atomic<kmp_int32> *unfinished_threads =
504  &(task_team->tt.tt_unfinished_threads);
505  KMP_ATOMIC_INC(unfinished_threads);
506  }
507  return true;
508  }
509  }
510  return false;
511 }
512 
513 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
514 // Set up a monitor on the flag variable causing the calling thread to wait in
515 // a less active state until the flag variable is modified.
516 template <class C>
517 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
518  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
519  kmp_info_t *th = __kmp_threads[th_gtid];
520 
521  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
522  flag->get()));
523 
524  // User-level mwait is available
525  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
526 
527  __kmp_suspend_initialize_thread(th);
528  __kmp_lock_suspend_mx(th);
529 
530  volatile void *spin = flag->get();
531  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
532 
533  if (!flag->done_check()) {
534  // Mark thread as no longer active
535  th->th.th_active = FALSE;
536  if (th->th.th_active_in_pool) {
537  th->th.th_active_in_pool = FALSE;
538  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
539  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
540  }
541  flag->set_sleeping();
542  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
543 #if KMP_HAVE_UMWAIT
544  if (__kmp_umwait_enabled) {
545  __kmp_umonitor(cacheline);
546  }
547 #elif KMP_HAVE_MWAIT
548  if (__kmp_mwait_enabled) {
549  __kmp_mm_monitor(cacheline, 0, 0);
550  }
551 #endif
552  // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
553  // the address could happen after the last time we checked and before
554  // monitoring started, in which case monitor can't detect the change.
555  if (flag->done_check())
556  flag->unset_sleeping();
557  else {
558  // if flag changes here, wake-up happens immediately
559  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
560  __kmp_unlock_suspend_mx(th);
561  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
562 #if KMP_HAVE_UMWAIT
563  if (__kmp_umwait_enabled) {
564  __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
565  }
566 #elif KMP_HAVE_MWAIT
567  if (__kmp_mwait_enabled) {
568  __kmp_mm_mwait(0, __kmp_mwait_hints);
569  }
570 #endif
571  KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
572  __kmp_lock_suspend_mx(th);
573  // Clean up sleep info; doesn't matter how/why this thread stopped waiting
574  if (flag->is_sleeping())
575  flag->unset_sleeping();
576  TCW_PTR(th->th.th_sleep_loc, NULL);
577  }
578  // Mark thread as active again
579  th->th.th_active = TRUE;
580  if (TCR_4(th->th.th_in_pool)) {
581  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
582  th->th.th_active_in_pool = TRUE;
583  }
584  } // Drop out to main wait loop to check flag, handle tasks, etc.
585  __kmp_unlock_suspend_mx(th);
586  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
587 }
588 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
589 
590 /* Release any threads specified as waiting on the flag by releasing the flag
591  and resume the waiting thread if indicated by the sleep bit(s). A thread that
592  calls __kmp_wait_template must call this function to wake up the potentially
593  sleeping thread and prevent deadlocks! */
594 template <class C> static inline void __kmp_release_template(C *flag) {
595 #ifdef KMP_DEBUG
596  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
597 #endif
598  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
599  KMP_DEBUG_ASSERT(flag->get());
600  KMP_FSYNC_RELEASING(flag->get_void_p());
601 
602  flag->internal_release();
603 
604  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
605  flag->load()));
606 
607  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
608  // Only need to check sleep stuff if infinite block time not set.
609  // Are *any* threads waiting on flag sleeping?
610  if (flag->is_any_sleeping()) {
611  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
612  // if sleeping waiter exists at i, sets current_waiter to i inside flag
613  kmp_info_t *waiter = flag->get_waiter(i);
614  if (waiter) {
615  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
616  // Wake up thread if needed
617  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
618  "flag(%p) set\n",
619  gtid, wait_gtid, flag->get()));
620  flag->resume(wait_gtid); // unsets flag's current_waiter when done
621  }
622  }
623  }
624  }
625 }
626 
627 template <typename FlagType> struct flag_traits {};
628 
629 template <> struct flag_traits<kmp_uint32> {
630  typedef kmp_uint32 flag_t;
631  static const flag_type t = flag32;
632  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
633  static inline flag_t test_then_add4(volatile flag_t *f) {
634  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
635  }
636  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
637  return KMP_TEST_THEN_OR32(f, v);
638  }
639  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
640  return KMP_TEST_THEN_AND32(f, v);
641  }
642 };
643 
644 template <> struct flag_traits<kmp_uint64> {
645  typedef kmp_uint64 flag_t;
646  static const flag_type t = flag64;
647  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
648  static inline flag_t test_then_add4(volatile flag_t *f) {
649  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
650  }
651  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
652  return KMP_TEST_THEN_OR64(f, v);
653  }
654  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
655  return KMP_TEST_THEN_AND64(f, v);
656  }
657 };
658 
659 // Basic flag that does not use C11 Atomics
660 template <typename FlagType, bool Sleepable>
661 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
662  typedef flag_traits<FlagType> traits_type;
663  FlagType checker;
665  kmp_info_t
666  *waiting_threads[1];
667  kmp_uint32
668  num_waiting_threads;
669 public:
670  kmp_basic_flag_native(volatile FlagType *p)
671  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
672  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
673  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
674  waiting_threads[0] = thr;
675  }
676  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
677  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
678  num_waiting_threads(0) {}
683  kmp_info_t *get_waiter(kmp_uint32 i) {
684  KMP_DEBUG_ASSERT(i < num_waiting_threads);
685  return waiting_threads[i];
686  }
690  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
696  void set_waiter(kmp_info_t *thr) {
697  waiting_threads[0] = thr;
698  num_waiting_threads = 1;
699  }
703  bool done_check() {
704  if (Sleepable)
705  return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
706  checker;
707  else
708  return traits_type::tcr(*(this->get())) == checker;
709  }
714  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
722  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
727  void internal_release() {
728  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
729  }
735  FlagType set_sleeping() {
736  return traits_type::test_then_or((volatile FlagType *)this->get(),
737  KMP_BARRIER_SLEEP_STATE);
738  }
744  FlagType unset_sleeping() {
745  return traits_type::test_then_and((volatile FlagType *)this->get(),
746  ~KMP_BARRIER_SLEEP_STATE);
747  }
752  bool is_sleeping_val(FlagType old_loc) {
753  return old_loc & KMP_BARRIER_SLEEP_STATE;
754  }
758  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
759  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
760  kmp_uint8 *get_stolen() { return NULL; }
761  enum barrier_type get_bt() { return bs_last_barrier; }
762 };
763 
764 template <typename FlagType, bool Sleepable>
765 class kmp_basic_flag : public kmp_flag<FlagType> {
766  typedef flag_traits<FlagType> traits_type;
767  FlagType checker;
769  kmp_info_t
770  *waiting_threads[1];
771  kmp_uint32
772  num_waiting_threads;
773 public:
774  kmp_basic_flag(std::atomic<FlagType> *p)
775  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
776  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
777  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
778  waiting_threads[0] = thr;
779  }
780  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
781  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
782  num_waiting_threads(0) {}
787  kmp_info_t *get_waiter(kmp_uint32 i) {
788  KMP_DEBUG_ASSERT(i < num_waiting_threads);
789  return waiting_threads[i];
790  }
794  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
800  void set_waiter(kmp_info_t *thr) {
801  waiting_threads[0] = thr;
802  num_waiting_threads = 1;
803  }
807  bool done_check() {
808  if (Sleepable)
809  return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
810  else
811  return this->load() == checker;
812  }
817  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
825  bool notdone_check() { return this->load() != checker; }
830  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
836  FlagType set_sleeping() {
837  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
838  }
844  FlagType unset_sleeping() {
845  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
846  }
851  bool is_sleeping_val(FlagType old_loc) {
852  return old_loc & KMP_BARRIER_SLEEP_STATE;
853  }
857  bool is_sleeping() { return is_sleeping_val(this->load()); }
858  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
859  kmp_uint8 *get_stolen() { return NULL; }
860  enum barrier_type get_bt() { return bs_last_barrier; }
861 };
862 
863 template <bool Cancellable, bool Sleepable>
864 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {
865 public:
866  kmp_flag_32(std::atomic<kmp_uint32> *p)
867  : kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
868  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
869  : kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}
870  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
871  : kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}
872  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
873 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
874  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
875 #endif
876  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
877  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
878  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
879  kmp_int32 is_constrained) {
880  return __kmp_execute_tasks_32(
881  this_thr, gtid, this, final_spin,
882  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
883  }
884  bool wait(kmp_info_t *this_thr,
885  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
886  if (final_spin)
887  return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
888  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
889  else
890  return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
891  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
892  }
893  void release() { __kmp_release_template(this); }
894  flag_type get_ptr_type() { return flag32; }
895 };
896 
897 template <bool Cancellable, bool Sleepable>
898 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {
899 public:
900  kmp_flag_64(volatile kmp_uint64 *p)
901  : kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
902  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
903  : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}
904  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
905  : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}
906  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
907 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
908  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
909 #endif
910  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
911  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
912  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
913  kmp_int32 is_constrained) {
914  return __kmp_execute_tasks_64(
915  this_thr, gtid, this, final_spin,
916  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
917  }
918  bool wait(kmp_info_t *this_thr,
919  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
920  if (final_spin)
921  return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
922  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
923  else
924  return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
925  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
926  }
927  void release() { __kmp_release_template(this); }
928  flag_type get_ptr_type() { return flag64; }
929 };
930 
931 // Hierarchical 64-bit on-core barrier instantiation
932 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
933  kmp_uint64 checker;
934  kmp_info_t *waiting_threads[1];
935  kmp_uint32 num_waiting_threads;
936  kmp_uint32
937  offset;
938  bool flag_switch;
939  enum barrier_type bt;
940  kmp_info_t *this_thr;
942 #if USE_ITT_BUILD
943  void *
944  itt_sync_obj;
945 #endif
946  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
947  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
948  }
949 
950 public:
951  kmp_flag_oncore(volatile kmp_uint64 *p)
952  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
953  flag_switch(false) {}
954  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
955  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
956  offset(idx), flag_switch(false) {}
957  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
958  enum barrier_type bar_t,
959  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
960  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
961  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
962  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
963  kmp_info_t *get_waiter(kmp_uint32 i) {
964  KMP_DEBUG_ASSERT(i < num_waiting_threads);
965  return waiting_threads[i];
966  }
967  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
968  void set_waiter(kmp_info_t *thr) {
969  waiting_threads[0] = thr;
970  num_waiting_threads = 1;
971  }
972  bool done_check_val(kmp_uint64 old_loc) {
973  return byteref(&old_loc, offset) == checker;
974  }
975  bool done_check() { return done_check_val(*get()); }
976  bool notdone_check() {
977  // Calculate flag_switch
978  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
979  flag_switch = true;
980  if (byteref(get(), offset) != 1 && !flag_switch)
981  return true;
982  else if (flag_switch) {
983  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
984  kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
985  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
986  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
987  }
988  return false;
989  }
990  void internal_release() {
991  // Other threads can write their own bytes simultaneously.
992  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
993  byteref(get(), offset) = 1;
994  } else {
995  kmp_uint64 mask = 0;
996  byteref(&mask, offset) = 1;
997  KMP_TEST_THEN_OR64(get(), mask);
998  }
999  }
1000  kmp_uint64 set_sleeping() {
1001  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
1002  }
1003  kmp_uint64 unset_sleeping() {
1004  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
1005  }
1006  bool is_sleeping_val(kmp_uint64 old_loc) {
1007  return old_loc & KMP_BARRIER_SLEEP_STATE;
1008  }
1009  bool is_sleeping() { return is_sleeping_val(*get()); }
1010  bool is_any_sleeping() { return is_sleeping_val(*get()); }
1011  void wait(kmp_info_t *this_thr, int final_spin) {
1012  if (final_spin)
1013  __kmp_wait_template<kmp_flag_oncore, TRUE>(
1014  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
1015  else
1016  __kmp_wait_template<kmp_flag_oncore, FALSE>(
1017  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
1018  }
1019  void release() { __kmp_release_template(this); }
1020  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
1021 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
1022  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
1023 #endif
1024  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1025  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1026  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1027  kmp_int32 is_constrained) {
1028 #if OMPD_SUPPORT
1029  int ret = __kmp_execute_tasks_oncore(
1030  this_thr, gtid, this, final_spin,
1031  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1032  if (ompd_state & OMPD_ENABLE_BP)
1033  ompd_bp_task_end();
1034  return ret;
1035 #else
1036  return __kmp_execute_tasks_oncore(
1037  this_thr, gtid, this, final_spin,
1038  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1039 #endif
1040  }
1041  kmp_uint8 *get_stolen() { return NULL; }
1042  enum barrier_type get_bt() { return bt; }
1043  flag_type get_ptr_type() { return flag_oncore; }
1044 };
1045 
1046 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
1047 // associated with int gtid.
1048 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
1049  if (!flag)
1050  return;
1051 
1052  switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) {
1053  case flag32:
1054  __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);
1055  break;
1056  case flag64:
1057  __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);
1058  break;
1059  case flag_oncore:
1060  __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);
1061  break;
1062  }
1063 }
1064 
1069 #endif // KMP_WAIT_RELEASE_H
flag_properties t
std::atomic< P > * loc
void * get_void_p()
flag_type get_type()
void store(P val)
flag_type
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63