LLVM OpenMP* Runtime Library
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <stddef.h>
14 #include <atomic>
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_io.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if KMP_USE_FUTEX
25 #include <sys/syscall.h>
26 #include <unistd.h>
27 // We should really include <futex.h>, but that causes compatibility problems on
28 // different Linux* OS distributions that either require that you include (or
29 // break when you try to include) <pci/types.h>. Since all we need is the two
30 // macros below (which are part of the kernel ABI, so can't change) we just
31 // define the constants here and don't include <futex.h>
32 #ifndef FUTEX_WAIT
33 #define FUTEX_WAIT 0
34 #endif
35 #ifndef FUTEX_WAKE
36 #define FUTEX_WAKE 1
37 #endif
38 #endif
39 
40 /* Implement spin locks for internal library use. */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void __kmp_validate_locks(void) {
44  int i;
45  kmp_uint32 x, y;
46 
47  /* Check to make sure unsigned arithmetic does wraps properly */
48  x = ~((kmp_uint32)0) - 2;
49  y = x - 2;
50 
51  for (i = 0; i < 8; ++i, ++x, ++y) {
52  kmp_uint32 z = (x - y);
53  KMP_ASSERT(z == 2);
54  }
55 
56  KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57 }
58 
59 /* ------------------------------------------------------------------------ */
60 /* test and set locks */
61 
62 // For the non-nested locks, we can only assume that the first 4 bytes were
63 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
65 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66 //
67 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
68 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69 
70 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71  return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72 }
73 
74 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75  return lck->lk.depth_locked != -1;
76 }
77 
78 __forceinline static int
79 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80  KMP_MB();
81 
82 #ifdef USE_LOCK_PROFILE
83  kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84  if ((curr != 0) && (curr != gtid + 1))
85  __kmp_printf("LOCK CONTENTION: %p\n", lck);
86 /* else __kmp_printf( "." );*/
87 #endif /* USE_LOCK_PROFILE */
88 
89  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91 
92  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93  __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94  KMP_FSYNC_ACQUIRED(lck);
95  return KMP_LOCK_ACQUIRED_FIRST;
96  }
97 
98  kmp_uint32 spins;
99  KMP_FSYNC_PREPARE(lck);
100  KMP_INIT_YIELD(spins);
101  kmp_backoff_t backoff = __kmp_spin_backoff_params;
102  do {
103  __kmp_spin_backoff(&backoff);
104  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
105  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
106  !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
107  KMP_FSYNC_ACQUIRED(lck);
108  return KMP_LOCK_ACQUIRED_FIRST;
109 }
110 
111 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
112  int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
113  return retval;
114 }
115 
116 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
117  kmp_int32 gtid) {
118  char const *const func = "omp_set_lock";
119  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
120  __kmp_is_tas_lock_nestable(lck)) {
121  KMP_FATAL(LockNestableUsedAsSimple, func);
122  }
123  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
124  KMP_FATAL(LockIsAlreadyOwned, func);
125  }
126  return __kmp_acquire_tas_lock(lck, gtid);
127 }
128 
129 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
130  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
131  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
132  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
133  __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
134  KMP_FSYNC_ACQUIRED(lck);
135  return TRUE;
136  }
137  return FALSE;
138 }
139 
140 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
141  kmp_int32 gtid) {
142  char const *const func = "omp_test_lock";
143  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
144  __kmp_is_tas_lock_nestable(lck)) {
145  KMP_FATAL(LockNestableUsedAsSimple, func);
146  }
147  return __kmp_test_tas_lock(lck, gtid);
148 }
149 
150 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
151  KMP_MB(); /* Flush all pending memory write invalidates. */
152 
153  KMP_FSYNC_RELEASING(lck);
154  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
155  KMP_MB(); /* Flush all pending memory write invalidates. */
156 
157  KMP_YIELD_OVERSUB();
158  return KMP_LOCK_RELEASED;
159 }
160 
161 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
162  kmp_int32 gtid) {
163  char const *const func = "omp_unset_lock";
164  KMP_MB(); /* in case another processor initialized lock */
165  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
166  __kmp_is_tas_lock_nestable(lck)) {
167  KMP_FATAL(LockNestableUsedAsSimple, func);
168  }
169  if (__kmp_get_tas_lock_owner(lck) == -1) {
170  KMP_FATAL(LockUnsettingFree, func);
171  }
172  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
173  (__kmp_get_tas_lock_owner(lck) != gtid)) {
174  KMP_FATAL(LockUnsettingSetByAnother, func);
175  }
176  return __kmp_release_tas_lock(lck, gtid);
177 }
178 
179 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
180  lck->lk.poll = KMP_LOCK_FREE(tas);
181 }
182 
183 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
184 
185 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
186  char const *const func = "omp_destroy_lock";
187  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
188  __kmp_is_tas_lock_nestable(lck)) {
189  KMP_FATAL(LockNestableUsedAsSimple, func);
190  }
191  if (__kmp_get_tas_lock_owner(lck) != -1) {
192  KMP_FATAL(LockStillOwned, func);
193  }
194  __kmp_destroy_tas_lock(lck);
195 }
196 
197 // nested test and set locks
198 
199 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
200  KMP_DEBUG_ASSERT(gtid >= 0);
201 
202  if (__kmp_get_tas_lock_owner(lck) == gtid) {
203  lck->lk.depth_locked += 1;
204  return KMP_LOCK_ACQUIRED_NEXT;
205  } else {
206  __kmp_acquire_tas_lock_timed_template(lck, gtid);
207  lck->lk.depth_locked = 1;
208  return KMP_LOCK_ACQUIRED_FIRST;
209  }
210 }
211 
212 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
213  kmp_int32 gtid) {
214  char const *const func = "omp_set_nest_lock";
215  if (!__kmp_is_tas_lock_nestable(lck)) {
216  KMP_FATAL(LockSimpleUsedAsNestable, func);
217  }
218  return __kmp_acquire_nested_tas_lock(lck, gtid);
219 }
220 
221 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
222  int retval;
223 
224  KMP_DEBUG_ASSERT(gtid >= 0);
225 
226  if (__kmp_get_tas_lock_owner(lck) == gtid) {
227  retval = ++lck->lk.depth_locked;
228  } else if (!__kmp_test_tas_lock(lck, gtid)) {
229  retval = 0;
230  } else {
231  KMP_MB();
232  retval = lck->lk.depth_locked = 1;
233  }
234  return retval;
235 }
236 
237 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
238  kmp_int32 gtid) {
239  char const *const func = "omp_test_nest_lock";
240  if (!__kmp_is_tas_lock_nestable(lck)) {
241  KMP_FATAL(LockSimpleUsedAsNestable, func);
242  }
243  return __kmp_test_nested_tas_lock(lck, gtid);
244 }
245 
246 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
247  KMP_DEBUG_ASSERT(gtid >= 0);
248 
249  KMP_MB();
250  if (--(lck->lk.depth_locked) == 0) {
251  __kmp_release_tas_lock(lck, gtid);
252  return KMP_LOCK_RELEASED;
253  }
254  return KMP_LOCK_STILL_HELD;
255 }
256 
257 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
258  kmp_int32 gtid) {
259  char const *const func = "omp_unset_nest_lock";
260  KMP_MB(); /* in case another processor initialized lock */
261  if (!__kmp_is_tas_lock_nestable(lck)) {
262  KMP_FATAL(LockSimpleUsedAsNestable, func);
263  }
264  if (__kmp_get_tas_lock_owner(lck) == -1) {
265  KMP_FATAL(LockUnsettingFree, func);
266  }
267  if (__kmp_get_tas_lock_owner(lck) != gtid) {
268  KMP_FATAL(LockUnsettingSetByAnother, func);
269  }
270  return __kmp_release_nested_tas_lock(lck, gtid);
271 }
272 
273 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
274  __kmp_init_tas_lock(lck);
275  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
276 }
277 
278 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
279  __kmp_destroy_tas_lock(lck);
280  lck->lk.depth_locked = 0;
281 }
282 
283 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
284  char const *const func = "omp_destroy_nest_lock";
285  if (!__kmp_is_tas_lock_nestable(lck)) {
286  KMP_FATAL(LockSimpleUsedAsNestable, func);
287  }
288  if (__kmp_get_tas_lock_owner(lck) != -1) {
289  KMP_FATAL(LockStillOwned, func);
290  }
291  __kmp_destroy_nested_tas_lock(lck);
292 }
293 
294 #if KMP_USE_FUTEX
295 
296 /* ------------------------------------------------------------------------ */
297 /* futex locks */
298 
299 // futex locks are really just test and set locks, with a different method
300 // of handling contention. They take the same amount of space as test and
301 // set locks, and are allocated the same way (i.e. use the area allocated by
302 // the compiler for non-nested locks / allocate nested locks on the heap).
303 
304 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
305  return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
306 }
307 
308 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
309  return lck->lk.depth_locked != -1;
310 }
311 
312 __forceinline static int
313 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
314  kmp_int32 gtid_code = (gtid + 1) << 1;
315 
316  KMP_MB();
317 
318 #ifdef USE_LOCK_PROFILE
319  kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
320  if ((curr != 0) && (curr != gtid_code))
321  __kmp_printf("LOCK CONTENTION: %p\n", lck);
322 /* else __kmp_printf( "." );*/
323 #endif /* USE_LOCK_PROFILE */
324 
325  KMP_FSYNC_PREPARE(lck);
326  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
327  lck, lck->lk.poll, gtid));
328 
329  kmp_int32 poll_val;
330 
331  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
332  &(lck->lk.poll), KMP_LOCK_FREE(futex),
333  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
334 
335  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
336  KA_TRACE(
337  1000,
338  ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
339  lck, gtid, poll_val, cond));
340 
341  // NOTE: if you try to use the following condition for this branch
342  //
343  // if ( poll_val & 1 == 0 )
344  //
345  // Then the 12.0 compiler has a bug where the following block will
346  // always be skipped, regardless of the value of the LSB of poll_val.
347  if (!cond) {
348  // Try to set the lsb in the poll to indicate to the owner
349  // thread that they need to wake this thread up.
350  if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
351  poll_val | KMP_LOCK_BUSY(1, futex))) {
352  KA_TRACE(
353  1000,
354  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
355  lck, lck->lk.poll, gtid));
356  continue;
357  }
358  poll_val |= KMP_LOCK_BUSY(1, futex);
359 
360  KA_TRACE(1000,
361  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
362  lck->lk.poll, gtid));
363  }
364 
365  KA_TRACE(
366  1000,
367  ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
368  lck, gtid, poll_val));
369 
370  long rc;
371  if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
372  NULL, 0)) != 0) {
373  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
374  "failed (rc=%ld errno=%d)\n",
375  lck, gtid, poll_val, rc, errno));
376  continue;
377  }
378 
379  KA_TRACE(1000,
380  ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
381  lck, gtid, poll_val));
382  // This thread has now done a successful futex wait call and was entered on
383  // the OS futex queue. We must now perform a futex wake call when releasing
384  // the lock, as we have no idea how many other threads are in the queue.
385  gtid_code |= 1;
386  }
387 
388  KMP_FSYNC_ACQUIRED(lck);
389  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
390  lck->lk.poll, gtid));
391  return KMP_LOCK_ACQUIRED_FIRST;
392 }
393 
394 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
395  int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
396  return retval;
397 }
398 
399 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
400  kmp_int32 gtid) {
401  char const *const func = "omp_set_lock";
402  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
403  __kmp_is_futex_lock_nestable(lck)) {
404  KMP_FATAL(LockNestableUsedAsSimple, func);
405  }
406  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
407  KMP_FATAL(LockIsAlreadyOwned, func);
408  }
409  return __kmp_acquire_futex_lock(lck, gtid);
410 }
411 
412 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
413  if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
414  KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
415  KMP_FSYNC_ACQUIRED(lck);
416  return TRUE;
417  }
418  return FALSE;
419 }
420 
421 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
422  kmp_int32 gtid) {
423  char const *const func = "omp_test_lock";
424  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
425  __kmp_is_futex_lock_nestable(lck)) {
426  KMP_FATAL(LockNestableUsedAsSimple, func);
427  }
428  return __kmp_test_futex_lock(lck, gtid);
429 }
430 
431 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
432  KMP_MB(); /* Flush all pending memory write invalidates. */
433 
434  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
435  lck, lck->lk.poll, gtid));
436 
437  KMP_FSYNC_RELEASING(lck);
438 
439  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
440 
441  KA_TRACE(1000,
442  ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
443  lck, gtid, poll_val));
444 
445  if (KMP_LOCK_STRIP(poll_val) & 1) {
446  KA_TRACE(1000,
447  ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
448  lck, gtid));
449  syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
450  NULL, NULL, 0);
451  }
452 
453  KMP_MB(); /* Flush all pending memory write invalidates. */
454 
455  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
456  lck->lk.poll, gtid));
457 
458  KMP_YIELD_OVERSUB();
459  return KMP_LOCK_RELEASED;
460 }
461 
462 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
463  kmp_int32 gtid) {
464  char const *const func = "omp_unset_lock";
465  KMP_MB(); /* in case another processor initialized lock */
466  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
467  __kmp_is_futex_lock_nestable(lck)) {
468  KMP_FATAL(LockNestableUsedAsSimple, func);
469  }
470  if (__kmp_get_futex_lock_owner(lck) == -1) {
471  KMP_FATAL(LockUnsettingFree, func);
472  }
473  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
474  (__kmp_get_futex_lock_owner(lck) != gtid)) {
475  KMP_FATAL(LockUnsettingSetByAnother, func);
476  }
477  return __kmp_release_futex_lock(lck, gtid);
478 }
479 
480 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
481  TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
482 }
483 
484 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
485 
486 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
487  char const *const func = "omp_destroy_lock";
488  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
489  __kmp_is_futex_lock_nestable(lck)) {
490  KMP_FATAL(LockNestableUsedAsSimple, func);
491  }
492  if (__kmp_get_futex_lock_owner(lck) != -1) {
493  KMP_FATAL(LockStillOwned, func);
494  }
495  __kmp_destroy_futex_lock(lck);
496 }
497 
498 // nested futex locks
499 
500 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
501  KMP_DEBUG_ASSERT(gtid >= 0);
502 
503  if (__kmp_get_futex_lock_owner(lck) == gtid) {
504  lck->lk.depth_locked += 1;
505  return KMP_LOCK_ACQUIRED_NEXT;
506  } else {
507  __kmp_acquire_futex_lock_timed_template(lck, gtid);
508  lck->lk.depth_locked = 1;
509  return KMP_LOCK_ACQUIRED_FIRST;
510  }
511 }
512 
513 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
514  kmp_int32 gtid) {
515  char const *const func = "omp_set_nest_lock";
516  if (!__kmp_is_futex_lock_nestable(lck)) {
517  KMP_FATAL(LockSimpleUsedAsNestable, func);
518  }
519  return __kmp_acquire_nested_futex_lock(lck, gtid);
520 }
521 
522 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
523  int retval;
524 
525  KMP_DEBUG_ASSERT(gtid >= 0);
526 
527  if (__kmp_get_futex_lock_owner(lck) == gtid) {
528  retval = ++lck->lk.depth_locked;
529  } else if (!__kmp_test_futex_lock(lck, gtid)) {
530  retval = 0;
531  } else {
532  KMP_MB();
533  retval = lck->lk.depth_locked = 1;
534  }
535  return retval;
536 }
537 
538 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
539  kmp_int32 gtid) {
540  char const *const func = "omp_test_nest_lock";
541  if (!__kmp_is_futex_lock_nestable(lck)) {
542  KMP_FATAL(LockSimpleUsedAsNestable, func);
543  }
544  return __kmp_test_nested_futex_lock(lck, gtid);
545 }
546 
547 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
548  KMP_DEBUG_ASSERT(gtid >= 0);
549 
550  KMP_MB();
551  if (--(lck->lk.depth_locked) == 0) {
552  __kmp_release_futex_lock(lck, gtid);
553  return KMP_LOCK_RELEASED;
554  }
555  return KMP_LOCK_STILL_HELD;
556 }
557 
558 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
559  kmp_int32 gtid) {
560  char const *const func = "omp_unset_nest_lock";
561  KMP_MB(); /* in case another processor initialized lock */
562  if (!__kmp_is_futex_lock_nestable(lck)) {
563  KMP_FATAL(LockSimpleUsedAsNestable, func);
564  }
565  if (__kmp_get_futex_lock_owner(lck) == -1) {
566  KMP_FATAL(LockUnsettingFree, func);
567  }
568  if (__kmp_get_futex_lock_owner(lck) != gtid) {
569  KMP_FATAL(LockUnsettingSetByAnother, func);
570  }
571  return __kmp_release_nested_futex_lock(lck, gtid);
572 }
573 
574 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
575  __kmp_init_futex_lock(lck);
576  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
577 }
578 
579 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
580  __kmp_destroy_futex_lock(lck);
581  lck->lk.depth_locked = 0;
582 }
583 
584 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
585  char const *const func = "omp_destroy_nest_lock";
586  if (!__kmp_is_futex_lock_nestable(lck)) {
587  KMP_FATAL(LockSimpleUsedAsNestable, func);
588  }
589  if (__kmp_get_futex_lock_owner(lck) != -1) {
590  KMP_FATAL(LockStillOwned, func);
591  }
592  __kmp_destroy_nested_futex_lock(lck);
593 }
594 
595 #endif // KMP_USE_FUTEX
596 
597 /* ------------------------------------------------------------------------ */
598 /* ticket (bakery) locks */
599 
600 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
601  return std::atomic_load_explicit(&lck->lk.owner_id,
602  std::memory_order_relaxed) -
603  1;
604 }
605 
606 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
607  return std::atomic_load_explicit(&lck->lk.depth_locked,
608  std::memory_order_relaxed) != -1;
609 }
610 
611 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
612  return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
613  std::memory_order_acquire) == my_ticket;
614 }
615 
616 __forceinline static int
617 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
618  kmp_int32 gtid) {
619  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
620  &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
621 
622 #ifdef USE_LOCK_PROFILE
623  if (std::atomic_load_explicit(&lck->lk.now_serving,
624  std::memory_order_relaxed) != my_ticket)
625  __kmp_printf("LOCK CONTENTION: %p\n", lck);
626 /* else __kmp_printf( "." );*/
627 #endif /* USE_LOCK_PROFILE */
628 
629  if (std::atomic_load_explicit(&lck->lk.now_serving,
630  std::memory_order_acquire) == my_ticket) {
631  return KMP_LOCK_ACQUIRED_FIRST;
632  }
633  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
634  return KMP_LOCK_ACQUIRED_FIRST;
635 }
636 
637 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
638  int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
639  return retval;
640 }
641 
642 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
643  kmp_int32 gtid) {
644  char const *const func = "omp_set_lock";
645 
646  if (!std::atomic_load_explicit(&lck->lk.initialized,
647  std::memory_order_relaxed)) {
648  KMP_FATAL(LockIsUninitialized, func);
649  }
650  if (lck->lk.self != lck) {
651  KMP_FATAL(LockIsUninitialized, func);
652  }
653  if (__kmp_is_ticket_lock_nestable(lck)) {
654  KMP_FATAL(LockNestableUsedAsSimple, func);
655  }
656  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
657  KMP_FATAL(LockIsAlreadyOwned, func);
658  }
659 
660  __kmp_acquire_ticket_lock(lck, gtid);
661 
662  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
663  std::memory_order_relaxed);
664  return KMP_LOCK_ACQUIRED_FIRST;
665 }
666 
667 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
668  kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
669  std::memory_order_relaxed);
670 
671  if (std::atomic_load_explicit(&lck->lk.now_serving,
672  std::memory_order_relaxed) == my_ticket) {
673  kmp_uint32 next_ticket = my_ticket + 1;
674  if (std::atomic_compare_exchange_strong_explicit(
675  &lck->lk.next_ticket, &my_ticket, next_ticket,
676  std::memory_order_acquire, std::memory_order_acquire)) {
677  return TRUE;
678  }
679  }
680  return FALSE;
681 }
682 
683 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
684  kmp_int32 gtid) {
685  char const *const func = "omp_test_lock";
686 
687  if (!std::atomic_load_explicit(&lck->lk.initialized,
688  std::memory_order_relaxed)) {
689  KMP_FATAL(LockIsUninitialized, func);
690  }
691  if (lck->lk.self != lck) {
692  KMP_FATAL(LockIsUninitialized, func);
693  }
694  if (__kmp_is_ticket_lock_nestable(lck)) {
695  KMP_FATAL(LockNestableUsedAsSimple, func);
696  }
697 
698  int retval = __kmp_test_ticket_lock(lck, gtid);
699 
700  if (retval) {
701  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
702  std::memory_order_relaxed);
703  }
704  return retval;
705 }
706 
707 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
708  kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
709  std::memory_order_relaxed) -
710  std::atomic_load_explicit(&lck->lk.now_serving,
711  std::memory_order_relaxed);
712 
713  std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
714  std::memory_order_release);
715 
716  KMP_YIELD(distance >
717  (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
718  return KMP_LOCK_RELEASED;
719 }
720 
721 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
722  kmp_int32 gtid) {
723  char const *const func = "omp_unset_lock";
724 
725  if (!std::atomic_load_explicit(&lck->lk.initialized,
726  std::memory_order_relaxed)) {
727  KMP_FATAL(LockIsUninitialized, func);
728  }
729  if (lck->lk.self != lck) {
730  KMP_FATAL(LockIsUninitialized, func);
731  }
732  if (__kmp_is_ticket_lock_nestable(lck)) {
733  KMP_FATAL(LockNestableUsedAsSimple, func);
734  }
735  if (__kmp_get_ticket_lock_owner(lck) == -1) {
736  KMP_FATAL(LockUnsettingFree, func);
737  }
738  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
739  (__kmp_get_ticket_lock_owner(lck) != gtid)) {
740  KMP_FATAL(LockUnsettingSetByAnother, func);
741  }
742  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
743  return __kmp_release_ticket_lock(lck, gtid);
744 }
745 
746 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
747  lck->lk.location = NULL;
748  lck->lk.self = lck;
749  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
750  std::memory_order_relaxed);
751  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
752  std::memory_order_relaxed);
753  std::atomic_store_explicit(
754  &lck->lk.owner_id, 0,
755  std::memory_order_relaxed); // no thread owns the lock.
756  std::atomic_store_explicit(
757  &lck->lk.depth_locked, -1,
758  std::memory_order_relaxed); // -1 => not a nested lock.
759  std::atomic_store_explicit(&lck->lk.initialized, true,
760  std::memory_order_release);
761 }
762 
763 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
764  std::atomic_store_explicit(&lck->lk.initialized, false,
765  std::memory_order_release);
766  lck->lk.self = NULL;
767  lck->lk.location = NULL;
768  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
769  std::memory_order_relaxed);
770  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
771  std::memory_order_relaxed);
772  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
773  std::atomic_store_explicit(&lck->lk.depth_locked, -1,
774  std::memory_order_relaxed);
775 }
776 
777 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
778  char const *const func = "omp_destroy_lock";
779 
780  if (!std::atomic_load_explicit(&lck->lk.initialized,
781  std::memory_order_relaxed)) {
782  KMP_FATAL(LockIsUninitialized, func);
783  }
784  if (lck->lk.self != lck) {
785  KMP_FATAL(LockIsUninitialized, func);
786  }
787  if (__kmp_is_ticket_lock_nestable(lck)) {
788  KMP_FATAL(LockNestableUsedAsSimple, func);
789  }
790  if (__kmp_get_ticket_lock_owner(lck) != -1) {
791  KMP_FATAL(LockStillOwned, func);
792  }
793  __kmp_destroy_ticket_lock(lck);
794 }
795 
796 // nested ticket locks
797 
798 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
799  KMP_DEBUG_ASSERT(gtid >= 0);
800 
801  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
802  std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
803  std::memory_order_relaxed);
804  return KMP_LOCK_ACQUIRED_NEXT;
805  } else {
806  __kmp_acquire_ticket_lock_timed_template(lck, gtid);
807  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
808  std::memory_order_relaxed);
809  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
810  std::memory_order_relaxed);
811  return KMP_LOCK_ACQUIRED_FIRST;
812  }
813 }
814 
815 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
816  kmp_int32 gtid) {
817  char const *const func = "omp_set_nest_lock";
818 
819  if (!std::atomic_load_explicit(&lck->lk.initialized,
820  std::memory_order_relaxed)) {
821  KMP_FATAL(LockIsUninitialized, func);
822  }
823  if (lck->lk.self != lck) {
824  KMP_FATAL(LockIsUninitialized, func);
825  }
826  if (!__kmp_is_ticket_lock_nestable(lck)) {
827  KMP_FATAL(LockSimpleUsedAsNestable, func);
828  }
829  return __kmp_acquire_nested_ticket_lock(lck, gtid);
830 }
831 
832 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
833  int retval;
834 
835  KMP_DEBUG_ASSERT(gtid >= 0);
836 
837  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
838  retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
839  std::memory_order_relaxed) +
840  1;
841  } else if (!__kmp_test_ticket_lock(lck, gtid)) {
842  retval = 0;
843  } else {
844  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
845  std::memory_order_relaxed);
846  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
847  std::memory_order_relaxed);
848  retval = 1;
849  }
850  return retval;
851 }
852 
853 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
854  kmp_int32 gtid) {
855  char const *const func = "omp_test_nest_lock";
856 
857  if (!std::atomic_load_explicit(&lck->lk.initialized,
858  std::memory_order_relaxed)) {
859  KMP_FATAL(LockIsUninitialized, func);
860  }
861  if (lck->lk.self != lck) {
862  KMP_FATAL(LockIsUninitialized, func);
863  }
864  if (!__kmp_is_ticket_lock_nestable(lck)) {
865  KMP_FATAL(LockSimpleUsedAsNestable, func);
866  }
867  return __kmp_test_nested_ticket_lock(lck, gtid);
868 }
869 
870 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
871  KMP_DEBUG_ASSERT(gtid >= 0);
872 
873  if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
874  std::memory_order_relaxed) -
875  1) == 0) {
876  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
877  __kmp_release_ticket_lock(lck, gtid);
878  return KMP_LOCK_RELEASED;
879  }
880  return KMP_LOCK_STILL_HELD;
881 }
882 
883 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
884  kmp_int32 gtid) {
885  char const *const func = "omp_unset_nest_lock";
886 
887  if (!std::atomic_load_explicit(&lck->lk.initialized,
888  std::memory_order_relaxed)) {
889  KMP_FATAL(LockIsUninitialized, func);
890  }
891  if (lck->lk.self != lck) {
892  KMP_FATAL(LockIsUninitialized, func);
893  }
894  if (!__kmp_is_ticket_lock_nestable(lck)) {
895  KMP_FATAL(LockSimpleUsedAsNestable, func);
896  }
897  if (__kmp_get_ticket_lock_owner(lck) == -1) {
898  KMP_FATAL(LockUnsettingFree, func);
899  }
900  if (__kmp_get_ticket_lock_owner(lck) != gtid) {
901  KMP_FATAL(LockUnsettingSetByAnother, func);
902  }
903  return __kmp_release_nested_ticket_lock(lck, gtid);
904 }
905 
906 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
907  __kmp_init_ticket_lock(lck);
908  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
909  std::memory_order_relaxed);
910  // >= 0 for nestable locks, -1 for simple locks
911 }
912 
913 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914  __kmp_destroy_ticket_lock(lck);
915  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916  std::memory_order_relaxed);
917 }
918 
919 static void
920 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
921  char const *const func = "omp_destroy_nest_lock";
922 
923  if (!std::atomic_load_explicit(&lck->lk.initialized,
924  std::memory_order_relaxed)) {
925  KMP_FATAL(LockIsUninitialized, func);
926  }
927  if (lck->lk.self != lck) {
928  KMP_FATAL(LockIsUninitialized, func);
929  }
930  if (!__kmp_is_ticket_lock_nestable(lck)) {
931  KMP_FATAL(LockSimpleUsedAsNestable, func);
932  }
933  if (__kmp_get_ticket_lock_owner(lck) != -1) {
934  KMP_FATAL(LockStillOwned, func);
935  }
936  __kmp_destroy_nested_ticket_lock(lck);
937 }
938 
939 // access functions to fields which don't exist for all lock kinds.
940 
941 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
942  return lck->lk.location;
943 }
944 
945 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
946  const ident_t *loc) {
947  lck->lk.location = loc;
948 }
949 
950 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
951  return lck->lk.flags;
952 }
953 
954 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
955  kmp_lock_flags_t flags) {
956  lck->lk.flags = flags;
957 }
958 
959 /* ------------------------------------------------------------------------ */
960 /* queuing locks */
961 
962 /* First the states
963  (head,tail) = 0, 0 means lock is unheld, nobody on queue
964  UINT_MAX or -1, 0 means lock is held, nobody on queue
965  h, h means lock held or about to transition,
966  1 element on queue
967  h, t h <> t, means lock is held or about to
968  transition, >1 elements on queue
969 
970  Now the transitions
971  Acquire(0,0) = -1 ,0
972  Release(0,0) = Error
973  Acquire(-1,0) = h ,h h > 0
974  Release(-1,0) = 0 ,0
975  Acquire(h,h) = h ,t h > 0, t > 0, h <> t
976  Release(h,h) = -1 ,0 h > 0
977  Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
978  Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
979 
980  And pictorially
981 
982  +-----+
983  | 0, 0|------- release -------> Error
984  +-----+
985  | ^
986  acquire| |release
987  | |
988  | |
989  v |
990  +-----+
991  |-1, 0|
992  +-----+
993  | ^
994  acquire| |release
995  | |
996  | |
997  v |
998  +-----+
999  | h, h|
1000  +-----+
1001  | ^
1002  acquire| |release
1003  | |
1004  | |
1005  v |
1006  +-----+
1007  | h, t|----- acquire, release loopback ---+
1008  +-----+ |
1009  ^ |
1010  | |
1011  +------------------------------------+
1012  */
1013 
1014 #ifdef DEBUG_QUEUING_LOCKS
1015 
1016 /* Stuff for circular trace buffer */
1017 #define TRACE_BUF_ELE 1024
1018 static char traces[TRACE_BUF_ELE][128] = {0};
1019 static int tc = 0;
1020 #define TRACE_LOCK(X, Y) \
1021  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1022 #define TRACE_LOCK_T(X, Y, Z) \
1023  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1024 #define TRACE_LOCK_HT(X, Y, Z, Q) \
1025  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1026  Z, Q);
1027 
1028 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1029  kmp_queuing_lock_t *lck, kmp_int32 head_id,
1030  kmp_int32 tail_id) {
1031  kmp_int32 t, i;
1032 
1033  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1034 
1035  i = tc % TRACE_BUF_ELE;
1036  __kmp_printf_no_lock("%s\n", traces[i]);
1037  i = (i + 1) % TRACE_BUF_ELE;
1038  while (i != (tc % TRACE_BUF_ELE)) {
1039  __kmp_printf_no_lock("%s", traces[i]);
1040  i = (i + 1) % TRACE_BUF_ELE;
1041  }
1042  __kmp_printf_no_lock("\n");
1043 
1044  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1045  "next_wait:%d, head_id:%d, tail_id:%d\n",
1046  gtid + 1, this_thr->th.th_spin_here,
1047  this_thr->th.th_next_waiting, head_id, tail_id);
1048 
1049  __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1050 
1051  if (lck->lk.head_id >= 1) {
1052  t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1053  while (t > 0) {
1054  __kmp_printf_no_lock("-> %d ", t);
1055  t = __kmp_threads[t - 1]->th.th_next_waiting;
1056  }
1057  }
1058  __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1059  __kmp_printf_no_lock("\n\n");
1060 }
1061 
1062 #endif /* DEBUG_QUEUING_LOCKS */
1063 
1064 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1065  return TCR_4(lck->lk.owner_id) - 1;
1066 }
1067 
1068 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1069  return lck->lk.depth_locked != -1;
1070 }
1071 
1072 /* Acquire a lock using a the queuing lock implementation */
1073 template <bool takeTime>
1074 /* [TLW] The unused template above is left behind because of what BEB believes
1075  is a potential compiler problem with __forceinline. */
1076 __forceinline static int
1077 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1078  kmp_int32 gtid) {
1079  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1080  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1081  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1082  volatile kmp_uint32 *spin_here_p;
1083 
1084 #if OMPT_SUPPORT
1085  ompt_state_t prev_state = ompt_state_undefined;
1086 #endif
1087 
1088  KA_TRACE(1000,
1089  ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1090 
1091  KMP_FSYNC_PREPARE(lck);
1092  KMP_DEBUG_ASSERT(this_thr != NULL);
1093  spin_here_p = &this_thr->th.th_spin_here;
1094 
1095 #ifdef DEBUG_QUEUING_LOCKS
1096  TRACE_LOCK(gtid + 1, "acq ent");
1097  if (*spin_here_p)
1098  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1099  if (this_thr->th.th_next_waiting != 0)
1100  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1101 #endif
1102  KMP_DEBUG_ASSERT(!*spin_here_p);
1103  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1104 
1105  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1106  head_id_p that may follow, not just in execution order, but also in
1107  visibility order. This way, when a releasing thread observes the changes to
1108  the queue by this thread, it can rightly assume that spin_here_p has
1109  already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1110  not premature. If the releasing thread sets spin_here_p to FALSE before
1111  this thread sets it to TRUE, this thread will hang. */
1112  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1113 
1114  while (1) {
1115  kmp_int32 enqueued;
1116  kmp_int32 head;
1117  kmp_int32 tail;
1118 
1119  head = *head_id_p;
1120 
1121  switch (head) {
1122 
1123  case -1: {
1124 #ifdef DEBUG_QUEUING_LOCKS
1125  tail = *tail_id_p;
1126  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1127 #endif
1128  tail = 0; /* to make sure next link asynchronously read is not set
1129  accidentally; this assignment prevents us from entering the
1130  if ( t > 0 ) condition in the enqueued case below, which is not
1131  necessary for this state transition */
1132 
1133  /* try (-1,0)->(tid,tid) */
1134  enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1135  KMP_PACK_64(-1, 0),
1136  KMP_PACK_64(gtid + 1, gtid + 1));
1137 #ifdef DEBUG_QUEUING_LOCKS
1138  if (enqueued)
1139  TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1140 #endif
1141  } break;
1142 
1143  default: {
1144  tail = *tail_id_p;
1145  KMP_DEBUG_ASSERT(tail != gtid + 1);
1146 
1147 #ifdef DEBUG_QUEUING_LOCKS
1148  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1149 #endif
1150 
1151  if (tail == 0) {
1152  enqueued = FALSE;
1153  } else {
1154  /* try (h,t) or (h,h)->(h,tid) */
1155  enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1156 
1157 #ifdef DEBUG_QUEUING_LOCKS
1158  if (enqueued)
1159  TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1160 #endif
1161  }
1162  } break;
1163 
1164  case 0: /* empty queue */
1165  {
1166  kmp_int32 grabbed_lock;
1167 
1168 #ifdef DEBUG_QUEUING_LOCKS
1169  tail = *tail_id_p;
1170  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1171 #endif
1172  /* try (0,0)->(-1,0) */
1173 
1174  /* only legal transition out of head = 0 is head = -1 with no change to
1175  * tail */
1176  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1177 
1178  if (grabbed_lock) {
1179 
1180  *spin_here_p = FALSE;
1181 
1182  KA_TRACE(
1183  1000,
1184  ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1185  lck, gtid));
1186 #ifdef DEBUG_QUEUING_LOCKS
1187  TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1188 #endif
1189 
1190 #if OMPT_SUPPORT
1191  if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1192  /* change the state before clearing wait_id */
1193  this_thr->th.ompt_thread_info.state = prev_state;
1194  this_thr->th.ompt_thread_info.wait_id = 0;
1195  }
1196 #endif
1197 
1198  KMP_FSYNC_ACQUIRED(lck);
1199  return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1200  }
1201  enqueued = FALSE;
1202  } break;
1203  }
1204 
1205 #if OMPT_SUPPORT
1206  if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1207  /* this thread will spin; set wait_id before entering wait state */
1208  prev_state = this_thr->th.ompt_thread_info.state;
1209  this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1210  this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1211  }
1212 #endif
1213 
1214  if (enqueued) {
1215  if (tail > 0) {
1216  kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1217  KMP_ASSERT(tail_thr != NULL);
1218  tail_thr->th.th_next_waiting = gtid + 1;
1219  /* corresponding wait for this write in release code */
1220  }
1221  KA_TRACE(1000,
1222  ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1223  lck, gtid));
1224 
1225  KMP_MB();
1226  // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1227  KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1228  // Synchronize writes to both runtime thread structures
1229  // and writes in user code.
1230  KMP_MB();
1231 
1232 #ifdef DEBUG_QUEUING_LOCKS
1233  TRACE_LOCK(gtid + 1, "acq spin");
1234 
1235  if (this_thr->th.th_next_waiting != 0)
1236  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1237 #endif
1238  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1239  KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1240  "waiting on queue\n",
1241  lck, gtid));
1242 
1243 #ifdef DEBUG_QUEUING_LOCKS
1244  TRACE_LOCK(gtid + 1, "acq exit 2");
1245 #endif
1246 
1247 #if OMPT_SUPPORT
1248  /* change the state before clearing wait_id */
1249  this_thr->th.ompt_thread_info.state = prev_state;
1250  this_thr->th.ompt_thread_info.wait_id = 0;
1251 #endif
1252 
1253  /* got lock, we were dequeued by the thread that released lock */
1254  return KMP_LOCK_ACQUIRED_FIRST;
1255  }
1256 
1257  /* Yield if number of threads > number of logical processors */
1258  /* ToDo: Not sure why this should only be in oversubscription case,
1259  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1260  KMP_YIELD_OVERSUB();
1261 
1262 #ifdef DEBUG_QUEUING_LOCKS
1263  TRACE_LOCK(gtid + 1, "acq retry");
1264 #endif
1265  }
1266  KMP_ASSERT2(0, "should not get here");
1267  return KMP_LOCK_ACQUIRED_FIRST;
1268 }
1269 
1270 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1271  KMP_DEBUG_ASSERT(gtid >= 0);
1272 
1273  int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1274  return retval;
1275 }
1276 
1277 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1278  kmp_int32 gtid) {
1279  char const *const func = "omp_set_lock";
1280  if (lck->lk.initialized != lck) {
1281  KMP_FATAL(LockIsUninitialized, func);
1282  }
1283  if (__kmp_is_queuing_lock_nestable(lck)) {
1284  KMP_FATAL(LockNestableUsedAsSimple, func);
1285  }
1286  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1287  KMP_FATAL(LockIsAlreadyOwned, func);
1288  }
1289 
1290  __kmp_acquire_queuing_lock(lck, gtid);
1291 
1292  lck->lk.owner_id = gtid + 1;
1293  return KMP_LOCK_ACQUIRED_FIRST;
1294 }
1295 
1296 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1297  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1298  kmp_int32 head;
1299 #ifdef KMP_DEBUG
1300  kmp_info_t *this_thr;
1301 #endif
1302 
1303  KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1304  KMP_DEBUG_ASSERT(gtid >= 0);
1305 #ifdef KMP_DEBUG
1306  this_thr = __kmp_thread_from_gtid(gtid);
1307  KMP_DEBUG_ASSERT(this_thr != NULL);
1308  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1309 #endif
1310 
1311  head = *head_id_p;
1312 
1313  if (head == 0) { /* nobody on queue, nobody holding */
1314  /* try (0,0)->(-1,0) */
1315  if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1316  KA_TRACE(1000,
1317  ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1318  KMP_FSYNC_ACQUIRED(lck);
1319  return TRUE;
1320  }
1321  }
1322 
1323  KA_TRACE(1000,
1324  ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1325  return FALSE;
1326 }
1327 
1328 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1329  kmp_int32 gtid) {
1330  char const *const func = "omp_test_lock";
1331  if (lck->lk.initialized != lck) {
1332  KMP_FATAL(LockIsUninitialized, func);
1333  }
1334  if (__kmp_is_queuing_lock_nestable(lck)) {
1335  KMP_FATAL(LockNestableUsedAsSimple, func);
1336  }
1337 
1338  int retval = __kmp_test_queuing_lock(lck, gtid);
1339 
1340  if (retval) {
1341  lck->lk.owner_id = gtid + 1;
1342  }
1343  return retval;
1344 }
1345 
1346 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1347  kmp_info_t *this_thr;
1348  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1349  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1350 
1351  KA_TRACE(1000,
1352  ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1353  KMP_DEBUG_ASSERT(gtid >= 0);
1354  this_thr = __kmp_thread_from_gtid(gtid);
1355  KMP_DEBUG_ASSERT(this_thr != NULL);
1356 #ifdef DEBUG_QUEUING_LOCKS
1357  TRACE_LOCK(gtid + 1, "rel ent");
1358 
1359  if (this_thr->th.th_spin_here)
1360  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1361  if (this_thr->th.th_next_waiting != 0)
1362  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1363 #endif
1364  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1365  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1366 
1367  KMP_FSYNC_RELEASING(lck);
1368 
1369  while (1) {
1370  kmp_int32 dequeued;
1371  kmp_int32 head;
1372  kmp_int32 tail;
1373 
1374  head = *head_id_p;
1375 
1376 #ifdef DEBUG_QUEUING_LOCKS
1377  tail = *tail_id_p;
1378  TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1379  if (head == 0)
1380  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1381 #endif
1382  KMP_DEBUG_ASSERT(head !=
1383  0); /* holding the lock, head must be -1 or queue head */
1384 
1385  if (head == -1) { /* nobody on queue */
1386  /* try (-1,0)->(0,0) */
1387  if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1388  KA_TRACE(
1389  1000,
1390  ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1391  lck, gtid));
1392 #ifdef DEBUG_QUEUING_LOCKS
1393  TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1394 #endif
1395 
1396 #if OMPT_SUPPORT
1397 /* nothing to do - no other thread is trying to shift blame */
1398 #endif
1399  return KMP_LOCK_RELEASED;
1400  }
1401  dequeued = FALSE;
1402  } else {
1403  KMP_MB();
1404  tail = *tail_id_p;
1405  if (head == tail) { /* only one thread on the queue */
1406 #ifdef DEBUG_QUEUING_LOCKS
1407  if (head <= 0)
1408  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1409 #endif
1410  KMP_DEBUG_ASSERT(head > 0);
1411 
1412  /* try (h,h)->(-1,0) */
1413  dequeued = KMP_COMPARE_AND_STORE_REL64(
1414  RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1415  KMP_PACK_64(-1, 0));
1416 #ifdef DEBUG_QUEUING_LOCKS
1417  TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1418 #endif
1419 
1420  } else {
1421  volatile kmp_int32 *waiting_id_p;
1422  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1423  KMP_DEBUG_ASSERT(head_thr != NULL);
1424  waiting_id_p = &head_thr->th.th_next_waiting;
1425 
1426 /* Does this require synchronous reads? */
1427 #ifdef DEBUG_QUEUING_LOCKS
1428  if (head <= 0 || tail <= 0)
1429  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1430 #endif
1431  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1432 
1433  /* try (h,t)->(h',t) or (t,t) */
1434  KMP_MB();
1435  /* make sure enqueuing thread has time to update next waiting thread
1436  * field */
1437  *head_id_p =
1438  KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1439 #ifdef DEBUG_QUEUING_LOCKS
1440  TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1441 #endif
1442  dequeued = TRUE;
1443  }
1444  }
1445 
1446  if (dequeued) {
1447  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1448  KMP_DEBUG_ASSERT(head_thr != NULL);
1449 
1450 /* Does this require synchronous reads? */
1451 #ifdef DEBUG_QUEUING_LOCKS
1452  if (head <= 0 || tail <= 0)
1453  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1454 #endif
1455  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1456 
1457  /* For clean code only. Thread not released until next statement prevents
1458  race with acquire code. */
1459  head_thr->th.th_next_waiting = 0;
1460 #ifdef DEBUG_QUEUING_LOCKS
1461  TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1462 #endif
1463 
1464  KMP_MB();
1465  /* reset spin value */
1466  head_thr->th.th_spin_here = FALSE;
1467 
1468  KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1469  "dequeuing\n",
1470  lck, gtid));
1471 #ifdef DEBUG_QUEUING_LOCKS
1472  TRACE_LOCK(gtid + 1, "rel exit 2");
1473 #endif
1474  return KMP_LOCK_RELEASED;
1475  }
1476  /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1477  threads */
1478 
1479 #ifdef DEBUG_QUEUING_LOCKS
1480  TRACE_LOCK(gtid + 1, "rel retry");
1481 #endif
1482 
1483  } /* while */
1484  KMP_ASSERT2(0, "should not get here");
1485  return KMP_LOCK_RELEASED;
1486 }
1487 
1488 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1489  kmp_int32 gtid) {
1490  char const *const func = "omp_unset_lock";
1491  KMP_MB(); /* in case another processor initialized lock */
1492  if (lck->lk.initialized != lck) {
1493  KMP_FATAL(LockIsUninitialized, func);
1494  }
1495  if (__kmp_is_queuing_lock_nestable(lck)) {
1496  KMP_FATAL(LockNestableUsedAsSimple, func);
1497  }
1498  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1499  KMP_FATAL(LockUnsettingFree, func);
1500  }
1501  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1502  KMP_FATAL(LockUnsettingSetByAnother, func);
1503  }
1504  lck->lk.owner_id = 0;
1505  return __kmp_release_queuing_lock(lck, gtid);
1506 }
1507 
1508 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1509  lck->lk.location = NULL;
1510  lck->lk.head_id = 0;
1511  lck->lk.tail_id = 0;
1512  lck->lk.next_ticket = 0;
1513  lck->lk.now_serving = 0;
1514  lck->lk.owner_id = 0; // no thread owns the lock.
1515  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1516  lck->lk.initialized = lck;
1517 
1518  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1519 }
1520 
1521 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1522  lck->lk.initialized = NULL;
1523  lck->lk.location = NULL;
1524  lck->lk.head_id = 0;
1525  lck->lk.tail_id = 0;
1526  lck->lk.next_ticket = 0;
1527  lck->lk.now_serving = 0;
1528  lck->lk.owner_id = 0;
1529  lck->lk.depth_locked = -1;
1530 }
1531 
1532 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1533  char const *const func = "omp_destroy_lock";
1534  if (lck->lk.initialized != lck) {
1535  KMP_FATAL(LockIsUninitialized, func);
1536  }
1537  if (__kmp_is_queuing_lock_nestable(lck)) {
1538  KMP_FATAL(LockNestableUsedAsSimple, func);
1539  }
1540  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1541  KMP_FATAL(LockStillOwned, func);
1542  }
1543  __kmp_destroy_queuing_lock(lck);
1544 }
1545 
1546 // nested queuing locks
1547 
1548 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1549  KMP_DEBUG_ASSERT(gtid >= 0);
1550 
1551  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1552  lck->lk.depth_locked += 1;
1553  return KMP_LOCK_ACQUIRED_NEXT;
1554  } else {
1555  __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1556  KMP_MB();
1557  lck->lk.depth_locked = 1;
1558  KMP_MB();
1559  lck->lk.owner_id = gtid + 1;
1560  return KMP_LOCK_ACQUIRED_FIRST;
1561  }
1562 }
1563 
1564 static int
1565 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1566  kmp_int32 gtid) {
1567  char const *const func = "omp_set_nest_lock";
1568  if (lck->lk.initialized != lck) {
1569  KMP_FATAL(LockIsUninitialized, func);
1570  }
1571  if (!__kmp_is_queuing_lock_nestable(lck)) {
1572  KMP_FATAL(LockSimpleUsedAsNestable, func);
1573  }
1574  return __kmp_acquire_nested_queuing_lock(lck, gtid);
1575 }
1576 
1577 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1578  int retval;
1579 
1580  KMP_DEBUG_ASSERT(gtid >= 0);
1581 
1582  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1583  retval = ++lck->lk.depth_locked;
1584  } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1585  retval = 0;
1586  } else {
1587  KMP_MB();
1588  retval = lck->lk.depth_locked = 1;
1589  KMP_MB();
1590  lck->lk.owner_id = gtid + 1;
1591  }
1592  return retval;
1593 }
1594 
1595 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1596  kmp_int32 gtid) {
1597  char const *const func = "omp_test_nest_lock";
1598  if (lck->lk.initialized != lck) {
1599  KMP_FATAL(LockIsUninitialized, func);
1600  }
1601  if (!__kmp_is_queuing_lock_nestable(lck)) {
1602  KMP_FATAL(LockSimpleUsedAsNestable, func);
1603  }
1604  return __kmp_test_nested_queuing_lock(lck, gtid);
1605 }
1606 
1607 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1608  KMP_DEBUG_ASSERT(gtid >= 0);
1609 
1610  KMP_MB();
1611  if (--(lck->lk.depth_locked) == 0) {
1612  KMP_MB();
1613  lck->lk.owner_id = 0;
1614  __kmp_release_queuing_lock(lck, gtid);
1615  return KMP_LOCK_RELEASED;
1616  }
1617  return KMP_LOCK_STILL_HELD;
1618 }
1619 
1620 static int
1621 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1622  kmp_int32 gtid) {
1623  char const *const func = "omp_unset_nest_lock";
1624  KMP_MB(); /* in case another processor initialized lock */
1625  if (lck->lk.initialized != lck) {
1626  KMP_FATAL(LockIsUninitialized, func);
1627  }
1628  if (!__kmp_is_queuing_lock_nestable(lck)) {
1629  KMP_FATAL(LockSimpleUsedAsNestable, func);
1630  }
1631  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1632  KMP_FATAL(LockUnsettingFree, func);
1633  }
1634  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1635  KMP_FATAL(LockUnsettingSetByAnother, func);
1636  }
1637  return __kmp_release_nested_queuing_lock(lck, gtid);
1638 }
1639 
1640 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1641  __kmp_init_queuing_lock(lck);
1642  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1643 }
1644 
1645 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1646  __kmp_destroy_queuing_lock(lck);
1647  lck->lk.depth_locked = 0;
1648 }
1649 
1650 static void
1651 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1652  char const *const func = "omp_destroy_nest_lock";
1653  if (lck->lk.initialized != lck) {
1654  KMP_FATAL(LockIsUninitialized, func);
1655  }
1656  if (!__kmp_is_queuing_lock_nestable(lck)) {
1657  KMP_FATAL(LockSimpleUsedAsNestable, func);
1658  }
1659  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1660  KMP_FATAL(LockStillOwned, func);
1661  }
1662  __kmp_destroy_nested_queuing_lock(lck);
1663 }
1664 
1665 // access functions to fields which don't exist for all lock kinds.
1666 
1667 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1668  return lck->lk.location;
1669 }
1670 
1671 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1672  const ident_t *loc) {
1673  lck->lk.location = loc;
1674 }
1675 
1676 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1677  return lck->lk.flags;
1678 }
1679 
1680 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1681  kmp_lock_flags_t flags) {
1682  lck->lk.flags = flags;
1683 }
1684 
1685 #if KMP_USE_ADAPTIVE_LOCKS
1686 
1687 /* RTM Adaptive locks */
1688 
1689 #if KMP_HAVE_RTM_INTRINSICS
1690 #include <immintrin.h>
1691 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1692 
1693 #else
1694 
1695 // Values from the status register after failed speculation.
1696 #define _XBEGIN_STARTED (~0u)
1697 #define _XABORT_EXPLICIT (1 << 0)
1698 #define _XABORT_RETRY (1 << 1)
1699 #define _XABORT_CONFLICT (1 << 2)
1700 #define _XABORT_CAPACITY (1 << 3)
1701 #define _XABORT_DEBUG (1 << 4)
1702 #define _XABORT_NESTED (1 << 5)
1703 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1704 
1705 // Aborts for which it's worth trying again immediately
1706 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1707 
1708 #define STRINGIZE_INTERNAL(arg) #arg
1709 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1710 
1711 // Access to RTM instructions
1712 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1713  an abort. This is the same definition as the compiler intrinsic that will be
1714  supported at some point. */
1715 static __inline int _xbegin() {
1716  int res = -1;
1717 
1718 #if KMP_OS_WINDOWS
1719 #if KMP_ARCH_X86_64
1720  _asm {
1721  _emit 0xC7
1722  _emit 0xF8
1723  _emit 2
1724  _emit 0
1725  _emit 0
1726  _emit 0
1727  jmp L2
1728  mov res, eax
1729  L2:
1730  }
1731 #else /* IA32 */
1732  _asm {
1733  _emit 0xC7
1734  _emit 0xF8
1735  _emit 2
1736  _emit 0
1737  _emit 0
1738  _emit 0
1739  jmp L2
1740  mov res, eax
1741  L2:
1742  }
1743 #endif // KMP_ARCH_X86_64
1744 #else
1745  /* Note that %eax must be noted as killed (clobbered), because the XSR is
1746  returned in %eax(%rax) on abort. Other register values are restored, so
1747  don't need to be killed.
1748 
1749  We must also mark 'res' as an input and an output, since otherwise
1750  'res=-1' may be dropped as being dead, whereas we do need the assignment on
1751  the successful (i.e., non-abort) path. */
1752  __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1753  " .long 1f-1b-6\n"
1754  " jmp 2f\n"
1755  "1: movl %%eax,%0\n"
1756  "2:"
1757  : "+r"(res)::"memory", "%eax");
1758 #endif // KMP_OS_WINDOWS
1759  return res;
1760 }
1761 
1762 /* Transaction end */
1763 static __inline void _xend() {
1764 #if KMP_OS_WINDOWS
1765  __asm {
1766  _emit 0x0f
1767  _emit 0x01
1768  _emit 0xd5
1769  }
1770 #else
1771  __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1772 #endif
1773 }
1774 
1775 /* This is a macro, the argument must be a single byte constant which can be
1776  evaluated by the inline assembler, since it is emitted as a byte into the
1777  assembly code. */
1778 // clang-format off
1779 #if KMP_OS_WINDOWS
1780 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1781 #else
1782 #define _xabort(ARG) \
1783  __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1784 #endif
1785 // clang-format on
1786 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1787 
1788 // Statistics is collected for testing purpose
1789 #if KMP_DEBUG_ADAPTIVE_LOCKS
1790 
1791 // We accumulate speculative lock statistics when the lock is destroyed. We
1792 // keep locks that haven't been destroyed in the liveLocks list so that we can
1793 // grab their statistics too.
1794 static kmp_adaptive_lock_statistics_t destroyedStats;
1795 
1796 // To hold the list of live locks.
1797 static kmp_adaptive_lock_info_t liveLocks;
1798 
1799 // A lock so we can safely update the list of locks.
1800 static kmp_bootstrap_lock_t chain_lock =
1801  KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1802 
1803 // Initialize the list of stats.
1804 void __kmp_init_speculative_stats() {
1805  kmp_adaptive_lock_info_t *lck = &liveLocks;
1806 
1807  memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1808  sizeof(lck->stats));
1809  lck->stats.next = lck;
1810  lck->stats.prev = lck;
1811 
1812  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1813  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1814 
1815  __kmp_init_bootstrap_lock(&chain_lock);
1816 }
1817 
1818 // Insert the lock into the circular list
1819 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1820  __kmp_acquire_bootstrap_lock(&chain_lock);
1821 
1822  lck->stats.next = liveLocks.stats.next;
1823  lck->stats.prev = &liveLocks;
1824 
1825  liveLocks.stats.next = lck;
1826  lck->stats.next->stats.prev = lck;
1827 
1828  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1829  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1830 
1831  __kmp_release_bootstrap_lock(&chain_lock);
1832 }
1833 
1834 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1835  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1836  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1837 
1838  kmp_adaptive_lock_info_t *n = lck->stats.next;
1839  kmp_adaptive_lock_info_t *p = lck->stats.prev;
1840 
1841  n->stats.prev = p;
1842  p->stats.next = n;
1843 }
1844 
1845 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1846  memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1847  sizeof(lck->stats));
1848  __kmp_remember_lock(lck);
1849 }
1850 
1851 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1852  kmp_adaptive_lock_info_t *lck) {
1853  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1854 
1855  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1856  t->successfulSpeculations += s->successfulSpeculations;
1857  t->hardFailedSpeculations += s->hardFailedSpeculations;
1858  t->softFailedSpeculations += s->softFailedSpeculations;
1859  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1860  t->lemmingYields += s->lemmingYields;
1861 }
1862 
1863 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1864  __kmp_acquire_bootstrap_lock(&chain_lock);
1865 
1866  __kmp_add_stats(&destroyedStats, lck);
1867  __kmp_forget_lock(lck);
1868 
1869  __kmp_release_bootstrap_lock(&chain_lock);
1870 }
1871 
1872 static float percent(kmp_uint32 count, kmp_uint32 total) {
1873  return (total == 0) ? 0.0 : (100.0 * count) / total;
1874 }
1875 
1876 void __kmp_print_speculative_stats() {
1877  kmp_adaptive_lock_statistics_t total = destroyedStats;
1878  kmp_adaptive_lock_info_t *lck;
1879 
1880  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1881  __kmp_add_stats(&total, lck);
1882  }
1883  kmp_adaptive_lock_statistics_t *t = &total;
1884  kmp_uint32 totalSections =
1885  t->nonSpeculativeAcquires + t->successfulSpeculations;
1886  kmp_uint32 totalSpeculations = t->successfulSpeculations +
1887  t->hardFailedSpeculations +
1888  t->softFailedSpeculations;
1889  if (totalSections <= 0)
1890  return;
1891 
1892  kmp_safe_raii_file_t statsFile;
1893  if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1894  statsFile.set_stdout();
1895  } else {
1896  size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1897  char buffer[buffLen];
1898  KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1899  (kmp_int32)getpid());
1900  statsFile.open(buffer, "w");
1901  }
1902 
1903  fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1904  fprintf(statsFile,
1905  " Lock parameters: \n"
1906  " max_soft_retries : %10d\n"
1907  " max_badness : %10d\n",
1908  __kmp_adaptive_backoff_params.max_soft_retries,
1909  __kmp_adaptive_backoff_params.max_badness);
1910  fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1911  t->nonSpeculativeAcquireAttempts);
1912  fprintf(statsFile, " Total critical sections : %10d\n",
1913  totalSections);
1914  fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1915  t->successfulSpeculations,
1916  percent(t->successfulSpeculations, totalSections));
1917  fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1918  t->nonSpeculativeAcquires,
1919  percent(t->nonSpeculativeAcquires, totalSections));
1920  fprintf(statsFile, " Lemming yields : %10d\n\n",
1921  t->lemmingYields);
1922 
1923  fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1924  totalSpeculations);
1925  fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
1926  t->successfulSpeculations,
1927  percent(t->successfulSpeculations, totalSpeculations));
1928  fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
1929  t->softFailedSpeculations,
1930  percent(t->softFailedSpeculations, totalSpeculations));
1931  fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
1932  t->hardFailedSpeculations,
1933  percent(t->hardFailedSpeculations, totalSpeculations));
1934 }
1935 
1936 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1937 #else
1938 #define KMP_INC_STAT(lck, stat)
1939 
1940 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
1941 
1942 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1943  // It is enough to check that the head_id is zero.
1944  // We don't also need to check the tail.
1945  bool res = lck->lk.head_id == 0;
1946 
1947 // We need a fence here, since we must ensure that no memory operations
1948 // from later in this thread float above that read.
1949 #if KMP_COMPILER_ICC
1950  _mm_mfence();
1951 #else
1952  __sync_synchronize();
1953 #endif
1954 
1955  return res;
1956 }
1957 
1958 // Functions for manipulating the badness
1959 static __inline void
1960 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1961  // Reset the badness to zero so we eagerly try to speculate again
1962  lck->lk.adaptive.badness = 0;
1963  KMP_INC_STAT(lck, successfulSpeculations);
1964 }
1965 
1966 // Create a bit mask with one more set bit.
1967 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1968  kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1969  if (newBadness > lck->lk.adaptive.max_badness) {
1970  return;
1971  } else {
1972  lck->lk.adaptive.badness = newBadness;
1973  }
1974 }
1975 
1976 // Check whether speculation should be attempted.
1977 KMP_ATTRIBUTE_TARGET_RTM
1978 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1979  kmp_int32 gtid) {
1980  kmp_uint32 badness = lck->lk.adaptive.badness;
1981  kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1982  int res = (attempts & badness) == 0;
1983  return res;
1984 }
1985 
1986 // Attempt to acquire only the speculative lock.
1987 // Does not back off to the non-speculative lock.
1988 KMP_ATTRIBUTE_TARGET_RTM
1989 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1990  kmp_int32 gtid) {
1991  int retries = lck->lk.adaptive.max_soft_retries;
1992 
1993  // We don't explicitly count the start of speculation, rather we record the
1994  // results (success, hard fail, soft fail). The sum of all of those is the
1995  // total number of times we started speculation since all speculations must
1996  // end one of those ways.
1997  do {
1998  kmp_uint32 status = _xbegin();
1999  // Switch this in to disable actual speculation but exercise at least some
2000  // of the rest of the code. Useful for debugging...
2001  // kmp_uint32 status = _XABORT_NESTED;
2002 
2003  if (status == _XBEGIN_STARTED) {
2004  /* We have successfully started speculation. Check that no-one acquired
2005  the lock for real between when we last looked and now. This also gets
2006  the lock cache line into our read-set, which we need so that we'll
2007  abort if anyone later claims it for real. */
2008  if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2009  // Lock is now visibly acquired, so someone beat us to it. Abort the
2010  // transaction so we'll restart from _xbegin with the failure status.
2011  _xabort(0x01);
2012  KMP_ASSERT2(0, "should not get here");
2013  }
2014  return 1; // Lock has been acquired (speculatively)
2015  } else {
2016  // We have aborted, update the statistics
2017  if (status & SOFT_ABORT_MASK) {
2018  KMP_INC_STAT(lck, softFailedSpeculations);
2019  // and loop round to retry.
2020  } else {
2021  KMP_INC_STAT(lck, hardFailedSpeculations);
2022  // Give up if we had a hard failure.
2023  break;
2024  }
2025  }
2026  } while (retries--); // Loop while we have retries, and didn't fail hard.
2027 
2028  // Either we had a hard failure or we didn't succeed softly after
2029  // the full set of attempts, so back off the badness.
2030  __kmp_step_badness(lck);
2031  return 0;
2032 }
2033 
2034 // Attempt to acquire the speculative lock, or back off to the non-speculative
2035 // one if the speculative lock cannot be acquired.
2036 // We can succeed speculatively, non-speculatively, or fail.
2037 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2038  // First try to acquire the lock speculatively
2039  if (__kmp_should_speculate(lck, gtid) &&
2040  __kmp_test_adaptive_lock_only(lck, gtid))
2041  return 1;
2042 
2043  // Speculative acquisition failed, so try to acquire it non-speculatively.
2044  // Count the non-speculative acquire attempt
2045  lck->lk.adaptive.acquire_attempts++;
2046 
2047  // Use base, non-speculative lock.
2048  if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2049  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2050  return 1; // Lock is acquired (non-speculatively)
2051  } else {
2052  return 0; // Failed to acquire the lock, it's already visibly locked.
2053  }
2054 }
2055 
2056 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2057  kmp_int32 gtid) {
2058  char const *const func = "omp_test_lock";
2059  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2060  KMP_FATAL(LockIsUninitialized, func);
2061  }
2062 
2063  int retval = __kmp_test_adaptive_lock(lck, gtid);
2064 
2065  if (retval) {
2066  lck->lk.qlk.owner_id = gtid + 1;
2067  }
2068  return retval;
2069 }
2070 
2071 // Block until we can acquire a speculative, adaptive lock. We check whether we
2072 // should be trying to speculate. If we should be, we check the real lock to see
2073 // if it is free, and, if not, pause without attempting to acquire it until it
2074 // is. Then we try the speculative acquire. This means that although we suffer
2075 // from lemmings a little (because all we can't acquire the lock speculatively
2076 // until the queue of threads waiting has cleared), we don't get into a state
2077 // where we can never acquire the lock speculatively (because we force the queue
2078 // to clear by preventing new arrivals from entering the queue). This does mean
2079 // that when we're trying to break lemmings, the lock is no longer fair. However
2080 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2081 // problem.
2082 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2083  kmp_int32 gtid) {
2084  if (__kmp_should_speculate(lck, gtid)) {
2085  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2086  if (__kmp_test_adaptive_lock_only(lck, gtid))
2087  return;
2088  // We tried speculation and failed, so give up.
2089  } else {
2090  // We can't try speculation until the lock is free, so we pause here
2091  // (without suspending on the queueing lock, to allow it to drain, then
2092  // try again. All other threads will also see the same result for
2093  // shouldSpeculate, so will be doing the same if they try to claim the
2094  // lock from now on.
2095  while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2096  KMP_INC_STAT(lck, lemmingYields);
2097  KMP_YIELD(TRUE);
2098  }
2099 
2100  if (__kmp_test_adaptive_lock_only(lck, gtid))
2101  return;
2102  }
2103  }
2104 
2105  // Speculative acquisition failed, so acquire it non-speculatively.
2106  // Count the non-speculative acquire attempt
2107  lck->lk.adaptive.acquire_attempts++;
2108 
2109  __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2110  // We have acquired the base lock, so count that.
2111  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2112 }
2113 
2114 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2115  kmp_int32 gtid) {
2116  char const *const func = "omp_set_lock";
2117  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2118  KMP_FATAL(LockIsUninitialized, func);
2119  }
2120  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2121  KMP_FATAL(LockIsAlreadyOwned, func);
2122  }
2123 
2124  __kmp_acquire_adaptive_lock(lck, gtid);
2125 
2126  lck->lk.qlk.owner_id = gtid + 1;
2127 }
2128 
2129 KMP_ATTRIBUTE_TARGET_RTM
2130 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2131  kmp_int32 gtid) {
2132  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2133  lck))) { // If the lock doesn't look claimed we must be speculating.
2134  // (Or the user's code is buggy and they're releasing without locking;
2135  // if we had XTEST we'd be able to check that case...)
2136  _xend(); // Exit speculation
2137  __kmp_update_badness_after_success(lck);
2138  } else { // Since the lock *is* visibly locked we're not speculating,
2139  // so should use the underlying lock's release scheme.
2140  __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2141  }
2142  return KMP_LOCK_RELEASED;
2143 }
2144 
2145 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2146  kmp_int32 gtid) {
2147  char const *const func = "omp_unset_lock";
2148  KMP_MB(); /* in case another processor initialized lock */
2149  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2150  KMP_FATAL(LockIsUninitialized, func);
2151  }
2152  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2153  KMP_FATAL(LockUnsettingFree, func);
2154  }
2155  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2156  KMP_FATAL(LockUnsettingSetByAnother, func);
2157  }
2158  lck->lk.qlk.owner_id = 0;
2159  __kmp_release_adaptive_lock(lck, gtid);
2160  return KMP_LOCK_RELEASED;
2161 }
2162 
2163 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2164  __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2165  lck->lk.adaptive.badness = 0;
2166  lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2167  lck->lk.adaptive.max_soft_retries =
2168  __kmp_adaptive_backoff_params.max_soft_retries;
2169  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2170 #if KMP_DEBUG_ADAPTIVE_LOCKS
2171  __kmp_zero_speculative_stats(&lck->lk.adaptive);
2172 #endif
2173  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2174 }
2175 
2176 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2177 #if KMP_DEBUG_ADAPTIVE_LOCKS
2178  __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2179 #endif
2180  __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2181  // Nothing needed for the speculative part.
2182 }
2183 
2184 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2185  char const *const func = "omp_destroy_lock";
2186  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2187  KMP_FATAL(LockIsUninitialized, func);
2188  }
2189  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2190  KMP_FATAL(LockStillOwned, func);
2191  }
2192  __kmp_destroy_adaptive_lock(lck);
2193 }
2194 
2195 #endif // KMP_USE_ADAPTIVE_LOCKS
2196 
2197 /* ------------------------------------------------------------------------ */
2198 /* DRDPA ticket locks */
2199 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2200 
2201 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2202  return lck->lk.owner_id - 1;
2203 }
2204 
2205 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2206  return lck->lk.depth_locked != -1;
2207 }
2208 
2209 __forceinline static int
2210 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2211  kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2212  kmp_uint64 mask = lck->lk.mask; // atomic load
2213  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2214 
2215 #ifdef USE_LOCK_PROFILE
2216  if (polls[ticket & mask] != ticket)
2217  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2218 /* else __kmp_printf( "." );*/
2219 #endif /* USE_LOCK_PROFILE */
2220 
2221  // Now spin-wait, but reload the polls pointer and mask, in case the
2222  // polling area has been reconfigured. Unless it is reconfigured, the
2223  // reloads stay in L1 cache and are cheap.
2224  //
2225  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2226  // The current implementation of KMP_WAIT doesn't allow for mask
2227  // and poll to be re-read every spin iteration.
2228  kmp_uint32 spins;
2229  KMP_FSYNC_PREPARE(lck);
2230  KMP_INIT_YIELD(spins);
2231  while (polls[ticket & mask] < ticket) { // atomic load
2232  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2233  // Re-read the mask and the poll pointer from the lock structure.
2234  //
2235  // Make certain that "mask" is read before "polls" !!!
2236  //
2237  // If another thread picks reconfigures the polling area and updates their
2238  // values, and we get the new value of mask and the old polls pointer, we
2239  // could access memory beyond the end of the old polling area.
2240  mask = lck->lk.mask; // atomic load
2241  polls = lck->lk.polls; // atomic load
2242  }
2243 
2244  // Critical section starts here
2245  KMP_FSYNC_ACQUIRED(lck);
2246  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2247  ticket, lck));
2248  lck->lk.now_serving = ticket; // non-volatile store
2249 
2250  // Deallocate a garbage polling area if we know that we are the last
2251  // thread that could possibly access it.
2252  //
2253  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2254  // ticket.
2255  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2256  __kmp_free(lck->lk.old_polls);
2257  lck->lk.old_polls = NULL;
2258  lck->lk.cleanup_ticket = 0;
2259  }
2260 
2261  // Check to see if we should reconfigure the polling area.
2262  // If there is still a garbage polling area to be deallocated from a
2263  // previous reconfiguration, let a later thread reconfigure it.
2264  if (lck->lk.old_polls == NULL) {
2265  bool reconfigure = false;
2266  std::atomic<kmp_uint64> *old_polls = polls;
2267  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2268 
2269  if (TCR_4(__kmp_nth) >
2270  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2271  // We are in oversubscription mode. Contract the polling area
2272  // down to a single location, if that hasn't been done already.
2273  if (num_polls > 1) {
2274  reconfigure = true;
2275  num_polls = TCR_4(lck->lk.num_polls);
2276  mask = 0;
2277  num_polls = 1;
2278  polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2279  sizeof(*polls));
2280  polls[0] = ticket;
2281  }
2282  } else {
2283  // We are in under/fully subscribed mode. Check the number of
2284  // threads waiting on the lock. The size of the polling area
2285  // should be at least the number of threads waiting.
2286  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2287  if (num_waiting > num_polls) {
2288  kmp_uint32 old_num_polls = num_polls;
2289  reconfigure = true;
2290  do {
2291  mask = (mask << 1) | 1;
2292  num_polls *= 2;
2293  } while (num_polls <= num_waiting);
2294 
2295  // Allocate the new polling area, and copy the relevant portion
2296  // of the old polling area to the new area. __kmp_allocate()
2297  // zeroes the memory it allocates, and most of the old area is
2298  // just zero padding, so we only copy the release counters.
2299  polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2300  sizeof(*polls));
2301  kmp_uint32 i;
2302  for (i = 0; i < old_num_polls; i++) {
2303  polls[i].store(old_polls[i]);
2304  }
2305  }
2306  }
2307 
2308  if (reconfigure) {
2309  // Now write the updated fields back to the lock structure.
2310  //
2311  // Make certain that "polls" is written before "mask" !!!
2312  //
2313  // If another thread picks up the new value of mask and the old polls
2314  // pointer , it could access memory beyond the end of the old polling
2315  // area.
2316  //
2317  // On x86, we need memory fences.
2318  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2319  "lock %p to %d polls\n",
2320  ticket, lck, num_polls));
2321 
2322  lck->lk.old_polls = old_polls;
2323  lck->lk.polls = polls; // atomic store
2324 
2325  KMP_MB();
2326 
2327  lck->lk.num_polls = num_polls;
2328  lck->lk.mask = mask; // atomic store
2329 
2330  KMP_MB();
2331 
2332  // Only after the new polling area and mask have been flushed
2333  // to main memory can we update the cleanup ticket field.
2334  //
2335  // volatile load / non-volatile store
2336  lck->lk.cleanup_ticket = lck->lk.next_ticket;
2337  }
2338  }
2339  return KMP_LOCK_ACQUIRED_FIRST;
2340 }
2341 
2342 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2343  int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2344  return retval;
2345 }
2346 
2347 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2348  kmp_int32 gtid) {
2349  char const *const func = "omp_set_lock";
2350  if (lck->lk.initialized != lck) {
2351  KMP_FATAL(LockIsUninitialized, func);
2352  }
2353  if (__kmp_is_drdpa_lock_nestable(lck)) {
2354  KMP_FATAL(LockNestableUsedAsSimple, func);
2355  }
2356  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2357  KMP_FATAL(LockIsAlreadyOwned, func);
2358  }
2359 
2360  __kmp_acquire_drdpa_lock(lck, gtid);
2361 
2362  lck->lk.owner_id = gtid + 1;
2363  return KMP_LOCK_ACQUIRED_FIRST;
2364 }
2365 
2366 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2367  // First get a ticket, then read the polls pointer and the mask.
2368  // The polls pointer must be read before the mask!!! (See above)
2369  kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2370  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2371  kmp_uint64 mask = lck->lk.mask; // atomic load
2372  if (polls[ticket & mask] == ticket) {
2373  kmp_uint64 next_ticket = ticket + 1;
2374  if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2375  next_ticket)) {
2376  KMP_FSYNC_ACQUIRED(lck);
2377  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2378  ticket, lck));
2379  lck->lk.now_serving = ticket; // non-volatile store
2380 
2381  // Since no threads are waiting, there is no possibility that we would
2382  // want to reconfigure the polling area. We might have the cleanup ticket
2383  // value (which says that it is now safe to deallocate old_polls), but
2384  // we'll let a later thread which calls __kmp_acquire_lock do that - this
2385  // routine isn't supposed to block, and we would risk blocks if we called
2386  // __kmp_free() to do the deallocation.
2387  return TRUE;
2388  }
2389  }
2390  return FALSE;
2391 }
2392 
2393 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2394  kmp_int32 gtid) {
2395  char const *const func = "omp_test_lock";
2396  if (lck->lk.initialized != lck) {
2397  KMP_FATAL(LockIsUninitialized, func);
2398  }
2399  if (__kmp_is_drdpa_lock_nestable(lck)) {
2400  KMP_FATAL(LockNestableUsedAsSimple, func);
2401  }
2402 
2403  int retval = __kmp_test_drdpa_lock(lck, gtid);
2404 
2405  if (retval) {
2406  lck->lk.owner_id = gtid + 1;
2407  }
2408  return retval;
2409 }
2410 
2411 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2412  // Read the ticket value from the lock data struct, then the polls pointer and
2413  // the mask. The polls pointer must be read before the mask!!! (See above)
2414  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2415  std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2416  kmp_uint64 mask = lck->lk.mask; // atomic load
2417  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2418  ticket - 1, lck));
2419  KMP_FSYNC_RELEASING(lck);
2420  polls[ticket & mask] = ticket; // atomic store
2421  return KMP_LOCK_RELEASED;
2422 }
2423 
2424 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2425  kmp_int32 gtid) {
2426  char const *const func = "omp_unset_lock";
2427  KMP_MB(); /* in case another processor initialized lock */
2428  if (lck->lk.initialized != lck) {
2429  KMP_FATAL(LockIsUninitialized, func);
2430  }
2431  if (__kmp_is_drdpa_lock_nestable(lck)) {
2432  KMP_FATAL(LockNestableUsedAsSimple, func);
2433  }
2434  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2435  KMP_FATAL(LockUnsettingFree, func);
2436  }
2437  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2438  (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2439  KMP_FATAL(LockUnsettingSetByAnother, func);
2440  }
2441  lck->lk.owner_id = 0;
2442  return __kmp_release_drdpa_lock(lck, gtid);
2443 }
2444 
2445 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2446  lck->lk.location = NULL;
2447  lck->lk.mask = 0;
2448  lck->lk.num_polls = 1;
2449  lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2450  lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2451  lck->lk.cleanup_ticket = 0;
2452  lck->lk.old_polls = NULL;
2453  lck->lk.next_ticket = 0;
2454  lck->lk.now_serving = 0;
2455  lck->lk.owner_id = 0; // no thread owns the lock.
2456  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2457  lck->lk.initialized = lck;
2458 
2459  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2460 }
2461 
2462 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2463  lck->lk.initialized = NULL;
2464  lck->lk.location = NULL;
2465  if (lck->lk.polls.load() != NULL) {
2466  __kmp_free(lck->lk.polls.load());
2467  lck->lk.polls = NULL;
2468  }
2469  if (lck->lk.old_polls != NULL) {
2470  __kmp_free(lck->lk.old_polls);
2471  lck->lk.old_polls = NULL;
2472  }
2473  lck->lk.mask = 0;
2474  lck->lk.num_polls = 0;
2475  lck->lk.cleanup_ticket = 0;
2476  lck->lk.next_ticket = 0;
2477  lck->lk.now_serving = 0;
2478  lck->lk.owner_id = 0;
2479  lck->lk.depth_locked = -1;
2480 }
2481 
2482 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2483  char const *const func = "omp_destroy_lock";
2484  if (lck->lk.initialized != lck) {
2485  KMP_FATAL(LockIsUninitialized, func);
2486  }
2487  if (__kmp_is_drdpa_lock_nestable(lck)) {
2488  KMP_FATAL(LockNestableUsedAsSimple, func);
2489  }
2490  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2491  KMP_FATAL(LockStillOwned, func);
2492  }
2493  __kmp_destroy_drdpa_lock(lck);
2494 }
2495 
2496 // nested drdpa ticket locks
2497 
2498 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2499  KMP_DEBUG_ASSERT(gtid >= 0);
2500 
2501  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2502  lck->lk.depth_locked += 1;
2503  return KMP_LOCK_ACQUIRED_NEXT;
2504  } else {
2505  __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2506  KMP_MB();
2507  lck->lk.depth_locked = 1;
2508  KMP_MB();
2509  lck->lk.owner_id = gtid + 1;
2510  return KMP_LOCK_ACQUIRED_FIRST;
2511  }
2512 }
2513 
2514 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2515  kmp_int32 gtid) {
2516  char const *const func = "omp_set_nest_lock";
2517  if (lck->lk.initialized != lck) {
2518  KMP_FATAL(LockIsUninitialized, func);
2519  }
2520  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2521  KMP_FATAL(LockSimpleUsedAsNestable, func);
2522  }
2523  __kmp_acquire_nested_drdpa_lock(lck, gtid);
2524 }
2525 
2526 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2527  int retval;
2528 
2529  KMP_DEBUG_ASSERT(gtid >= 0);
2530 
2531  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2532  retval = ++lck->lk.depth_locked;
2533  } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2534  retval = 0;
2535  } else {
2536  KMP_MB();
2537  retval = lck->lk.depth_locked = 1;
2538  KMP_MB();
2539  lck->lk.owner_id = gtid + 1;
2540  }
2541  return retval;
2542 }
2543 
2544 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2545  kmp_int32 gtid) {
2546  char const *const func = "omp_test_nest_lock";
2547  if (lck->lk.initialized != lck) {
2548  KMP_FATAL(LockIsUninitialized, func);
2549  }
2550  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2551  KMP_FATAL(LockSimpleUsedAsNestable, func);
2552  }
2553  return __kmp_test_nested_drdpa_lock(lck, gtid);
2554 }
2555 
2556 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2557  KMP_DEBUG_ASSERT(gtid >= 0);
2558 
2559  KMP_MB();
2560  if (--(lck->lk.depth_locked) == 0) {
2561  KMP_MB();
2562  lck->lk.owner_id = 0;
2563  __kmp_release_drdpa_lock(lck, gtid);
2564  return KMP_LOCK_RELEASED;
2565  }
2566  return KMP_LOCK_STILL_HELD;
2567 }
2568 
2569 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2570  kmp_int32 gtid) {
2571  char const *const func = "omp_unset_nest_lock";
2572  KMP_MB(); /* in case another processor initialized lock */
2573  if (lck->lk.initialized != lck) {
2574  KMP_FATAL(LockIsUninitialized, func);
2575  }
2576  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2577  KMP_FATAL(LockSimpleUsedAsNestable, func);
2578  }
2579  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2580  KMP_FATAL(LockUnsettingFree, func);
2581  }
2582  if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2583  KMP_FATAL(LockUnsettingSetByAnother, func);
2584  }
2585  return __kmp_release_nested_drdpa_lock(lck, gtid);
2586 }
2587 
2588 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2589  __kmp_init_drdpa_lock(lck);
2590  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2591 }
2592 
2593 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2594  __kmp_destroy_drdpa_lock(lck);
2595  lck->lk.depth_locked = 0;
2596 }
2597 
2598 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2599  char const *const func = "omp_destroy_nest_lock";
2600  if (lck->lk.initialized != lck) {
2601  KMP_FATAL(LockIsUninitialized, func);
2602  }
2603  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2604  KMP_FATAL(LockSimpleUsedAsNestable, func);
2605  }
2606  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2607  KMP_FATAL(LockStillOwned, func);
2608  }
2609  __kmp_destroy_nested_drdpa_lock(lck);
2610 }
2611 
2612 // access functions to fields which don't exist for all lock kinds.
2613 
2614 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2615  return lck->lk.location;
2616 }
2617 
2618 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2619  const ident_t *loc) {
2620  lck->lk.location = loc;
2621 }
2622 
2623 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2624  return lck->lk.flags;
2625 }
2626 
2627 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2628  kmp_lock_flags_t flags) {
2629  lck->lk.flags = flags;
2630 }
2631 
2632 // Time stamp counter
2633 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2634 #define __kmp_tsc() __kmp_hardware_timestamp()
2635 // Runtime's default backoff parameters
2636 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2637 #else
2638 // Use nanoseconds for other platforms
2639 extern kmp_uint64 __kmp_now_nsec();
2640 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2641 #define __kmp_tsc() __kmp_now_nsec()
2642 #endif
2643 
2644 // A useful predicate for dealing with timestamps that may wrap.
2645 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2646 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2647 // Times where going clockwise is less distance than going anti-clockwise
2648 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2649 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2650 // signed(b) = 0 captures the actual difference
2651 static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2652  return ((kmp_int64)b - (kmp_int64)a) > 0;
2653 }
2654 
2655 // Truncated binary exponential backoff function
2656 void __kmp_spin_backoff(kmp_backoff_t *boff) {
2657  // We could flatten this loop, but making it a nested loop gives better result
2658  kmp_uint32 i;
2659  for (i = boff->step; i > 0; i--) {
2660  kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2661  do {
2662  KMP_CPU_PAUSE();
2663  } while (before(__kmp_tsc(), goal));
2664  }
2665  boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2666 }
2667 
2668 #if KMP_USE_DYNAMIC_LOCK
2669 
2670 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2671 // lock word.
2672 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2673  kmp_dyna_lockseq_t seq) {
2674  TCW_4(*lck, KMP_GET_D_TAG(seq));
2675  KA_TRACE(
2676  20,
2677  ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2678 }
2679 
2680 #if KMP_USE_TSX
2681 
2682 // HLE lock functions - imported from the testbed runtime.
2683 #define HLE_ACQUIRE ".byte 0xf2;"
2684 #define HLE_RELEASE ".byte 0xf3;"
2685 
2686 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2687  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2688  return v;
2689 }
2690 
2691 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2692 
2693 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2694  TCW_4(*lck, 0);
2695 }
2696 
2697 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2698  // Use gtid for KMP_LOCK_BUSY if necessary
2699  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2700  int delay = 1;
2701  do {
2702  while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2703  for (int i = delay; i != 0; --i)
2704  KMP_CPU_PAUSE();
2705  delay = ((delay << 1) | 1) & 7;
2706  }
2707  } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2708  }
2709 }
2710 
2711 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2712  kmp_int32 gtid) {
2713  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2714 }
2715 
2716 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2717  __asm__ volatile(HLE_RELEASE "movl %1,%0"
2718  : "=m"(*lck)
2719  : "r"(KMP_LOCK_FREE(hle))
2720  : "memory");
2721  return KMP_LOCK_RELEASED;
2722 }
2723 
2724 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2725  kmp_int32 gtid) {
2726  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2727 }
2728 
2729 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2730  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2731 }
2732 
2733 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2734  kmp_int32 gtid) {
2735  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2736 }
2737 
2738 static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2739  __kmp_init_queuing_lock(lck);
2740 }
2741 
2742 static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2743  __kmp_destroy_queuing_lock(lck);
2744 }
2745 
2746 static void
2747 __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2748  __kmp_destroy_queuing_lock_with_checks(lck);
2749 }
2750 
2751 KMP_ATTRIBUTE_TARGET_RTM
2752 static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2753  kmp_int32 gtid) {
2754  unsigned retries = 3, status;
2755  do {
2756  status = _xbegin();
2757  if (status == _XBEGIN_STARTED) {
2758  if (__kmp_is_unlocked_queuing_lock(lck))
2759  return;
2760  _xabort(0xff);
2761  }
2762  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2763  // Wait until lock becomes free
2764  while (!__kmp_is_unlocked_queuing_lock(lck)) {
2765  KMP_YIELD(TRUE);
2766  }
2767  } else if (!(status & _XABORT_RETRY))
2768  break;
2769  } while (retries--);
2770 
2771  // Fall-back non-speculative lock (xchg)
2772  __kmp_acquire_queuing_lock(lck, gtid);
2773 }
2774 
2775 static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2776  kmp_int32 gtid) {
2777  __kmp_acquire_rtm_queuing_lock(lck, gtid);
2778 }
2779 
2780 KMP_ATTRIBUTE_TARGET_RTM
2781 static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2782  kmp_int32 gtid) {
2783  if (__kmp_is_unlocked_queuing_lock(lck)) {
2784  // Releasing from speculation
2785  _xend();
2786  } else {
2787  // Releasing from a real lock
2788  __kmp_release_queuing_lock(lck, gtid);
2789  }
2790  return KMP_LOCK_RELEASED;
2791 }
2792 
2793 static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2794  kmp_int32 gtid) {
2795  return __kmp_release_rtm_queuing_lock(lck, gtid);
2796 }
2797 
2798 KMP_ATTRIBUTE_TARGET_RTM
2799 static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2800  kmp_int32 gtid) {
2801  unsigned retries = 3, status;
2802  do {
2803  status = _xbegin();
2804  if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2805  return 1;
2806  }
2807  if (!(status & _XABORT_RETRY))
2808  break;
2809  } while (retries--);
2810 
2811  return __kmp_test_queuing_lock(lck, gtid);
2812 }
2813 
2814 static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2815  kmp_int32 gtid) {
2816  return __kmp_test_rtm_queuing_lock(lck, gtid);
2817 }
2818 
2819 // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2820 typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2821 
2822 static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2823  KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2824 }
2825 
2826 static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2827  __kmp_destroy_rtm_spin_lock(lck);
2828 }
2829 
2830 KMP_ATTRIBUTE_TARGET_RTM
2831 static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2832  kmp_int32 gtid) {
2833  unsigned retries = 3, status;
2834  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2835  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2836  do {
2837  status = _xbegin();
2838  if (status == _XBEGIN_STARTED) {
2839  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2840  return KMP_LOCK_ACQUIRED_FIRST;
2841  _xabort(0xff);
2842  }
2843  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2844  // Wait until lock becomes free
2845  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2846  KMP_YIELD(TRUE);
2847  }
2848  } else if (!(status & _XABORT_RETRY))
2849  break;
2850  } while (retries--);
2851 
2852  // Fall-back spin lock
2853  KMP_FSYNC_PREPARE(lck);
2854  kmp_backoff_t backoff = __kmp_spin_backoff_params;
2855  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2856  !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2857  __kmp_spin_backoff(&backoff);
2858  }
2859  KMP_FSYNC_ACQUIRED(lck);
2860  return KMP_LOCK_ACQUIRED_FIRST;
2861 }
2862 
2863 static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2864  kmp_int32 gtid) {
2865  return __kmp_acquire_rtm_spin_lock(lck, gtid);
2866 }
2867 
2868 KMP_ATTRIBUTE_TARGET_RTM
2869 static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2870  kmp_int32 gtid) {
2871  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2872  // Releasing from speculation
2873  _xend();
2874  } else {
2875  // Releasing from a real lock
2876  KMP_FSYNC_RELEASING(lck);
2877  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2878  }
2879  return KMP_LOCK_RELEASED;
2880 }
2881 
2882 static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2883  kmp_int32 gtid) {
2884  return __kmp_release_rtm_spin_lock(lck, gtid);
2885 }
2886 
2887 KMP_ATTRIBUTE_TARGET_RTM
2888 static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2889  unsigned retries = 3, status;
2890  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2891  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2892  do {
2893  status = _xbegin();
2894  if (status == _XBEGIN_STARTED &&
2895  KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2896  return TRUE;
2897  }
2898  if (!(status & _XABORT_RETRY))
2899  break;
2900  } while (retries--);
2901 
2902  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2903  __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2904  KMP_FSYNC_ACQUIRED(lck);
2905  return TRUE;
2906  }
2907  return FALSE;
2908 }
2909 
2910 static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2911  kmp_int32 gtid) {
2912  return __kmp_test_rtm_spin_lock(lck, gtid);
2913 }
2914 
2915 #endif // KMP_USE_TSX
2916 
2917 // Entry functions for indirect locks (first element of direct lock jump tables)
2918 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2919  kmp_dyna_lockseq_t tag);
2920 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2921 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2922 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2923 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2924 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2925  kmp_int32);
2926 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2927  kmp_int32);
2928 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2929  kmp_int32);
2930 
2931 // Lock function definitions for the union parameter type
2932 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2933 
2934 #define expand1(lk, op) \
2935  static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2936  __kmp_##op##_##lk##_##lock(&lock->lk); \
2937  }
2938 #define expand2(lk, op) \
2939  static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2940  kmp_int32 gtid) { \
2941  return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2942  }
2943 #define expand3(lk, op) \
2944  static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2945  kmp_lock_flags_t flags) { \
2946  __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2947  }
2948 #define expand4(lk, op) \
2949  static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2950  const ident_t *loc) { \
2951  __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2952  }
2953 
2954 KMP_FOREACH_LOCK_KIND(expand1, init)
2955 KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2956 KMP_FOREACH_LOCK_KIND(expand1, destroy)
2957 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2958 KMP_FOREACH_LOCK_KIND(expand2, acquire)
2959 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2960 KMP_FOREACH_LOCK_KIND(expand2, release)
2961 KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2962 KMP_FOREACH_LOCK_KIND(expand2, test)
2963 KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2964 KMP_FOREACH_LOCK_KIND(expand3, )
2965 KMP_FOREACH_LOCK_KIND(expand4, )
2966 
2967 #undef expand1
2968 #undef expand2
2969 #undef expand3
2970 #undef expand4
2971 
2972 // Jump tables for the indirect lock functions
2973 // Only fill in the odd entries, that avoids the need to shift out the low bit
2974 
2975 // init functions
2976 #define expand(l, op) 0, __kmp_init_direct_lock,
2977 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2978  __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2979 #undef expand
2980 
2981 // destroy functions
2982 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2983 static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2984  __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2985 #undef expand
2986 #define expand(l, op) \
2987  0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2988 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
2989  __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2990 #undef expand
2991 
2992 // set/acquire functions
2993 #define expand(l, op) \
2994  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2995 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2996  __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2997 #undef expand
2998 #define expand(l, op) \
2999  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3000 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3001  __kmp_set_indirect_lock_with_checks, 0,
3002  KMP_FOREACH_D_LOCK(expand, acquire)};
3003 #undef expand
3004 
3005 // unset/release and test functions
3006 #define expand(l, op) \
3007  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3008 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3009  __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3010 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3011  __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3012 #undef expand
3013 #define expand(l, op) \
3014  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3015 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3016  __kmp_unset_indirect_lock_with_checks, 0,
3017  KMP_FOREACH_D_LOCK(expand, release)};
3018 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3019  __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3020 #undef expand
3021 
3022 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3023 void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3024 int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3025 int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3026 int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3027 
3028 // Jump tables for the indirect lock functions
3029 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3030 void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3031  KMP_FOREACH_I_LOCK(expand, init)};
3032 #undef expand
3033 
3034 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3035 static void (*indirect_destroy[])(kmp_user_lock_p) = {
3036  KMP_FOREACH_I_LOCK(expand, destroy)};
3037 #undef expand
3038 #define expand(l, op) \
3039  (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3040 static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3041  KMP_FOREACH_I_LOCK(expand, destroy)};
3042 #undef expand
3043 
3044 // set/acquire functions
3045 #define expand(l, op) \
3046  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3047 static int (*indirect_set[])(kmp_user_lock_p,
3048  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3049 #undef expand
3050 #define expand(l, op) \
3051  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3052 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3053  KMP_FOREACH_I_LOCK(expand, acquire)};
3054 #undef expand
3055 
3056 // unset/release and test functions
3057 #define expand(l, op) \
3058  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3059 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3060  KMP_FOREACH_I_LOCK(expand, release)};
3061 static int (*indirect_test[])(kmp_user_lock_p,
3062  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3063 #undef expand
3064 #define expand(l, op) \
3065  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3066 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3067  KMP_FOREACH_I_LOCK(expand, release)};
3068 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3069  KMP_FOREACH_I_LOCK(expand, test)};
3070 #undef expand
3071 
3072 // Exposes only one jump tables (*lock or *lock_with_checks).
3073 void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3074 int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3075 int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3076 int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3077 
3078 // Lock index table.
3079 kmp_indirect_lock_table_t __kmp_i_lock_table;
3080 
3081 // Size of indirect locks.
3082 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3083 
3084 // Jump tables for lock accessor/modifier.
3085 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3086  const ident_t *) = {0};
3087 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3088  kmp_lock_flags_t) = {0};
3089 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3090  kmp_user_lock_p) = {0};
3091 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3092  kmp_user_lock_p) = {0};
3093 
3094 // Use different lock pools for different lock types.
3095 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3096 
3097 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3098 // the indirect lock table holds the address and type of the allocated indirect
3099 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3100 // full. A destroyed indirect lock object is returned to the reusable pool of
3101 // locks, unique to each lock type.
3102 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3103  kmp_int32 gtid,
3104  kmp_indirect_locktag_t tag) {
3105  kmp_indirect_lock_t *lck;
3106  kmp_lock_index_t idx;
3107 
3108  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3109 
3110  if (__kmp_indirect_lock_pool[tag] != NULL) {
3111  // Reuse the allocated and destroyed lock object
3112  lck = __kmp_indirect_lock_pool[tag];
3113  if (OMP_LOCK_T_SIZE < sizeof(void *))
3114  idx = lck->lock->pool.index;
3115  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3116  KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3117  lck));
3118  } else {
3119  idx = __kmp_i_lock_table.next;
3120  // Check capacity and double the size if it is full
3121  if (idx == __kmp_i_lock_table.size) {
3122  // Double up the space for block pointers
3123  int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
3124  kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
3125  2 * row * sizeof(kmp_indirect_lock_t *));
3126  KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
3127  row * sizeof(kmp_indirect_lock_t *));
3128  kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3129  __kmp_i_lock_table.table = new_table;
3130  __kmp_free(old_table);
3131  // Allocate new objects in the new blocks
3132  for (int i = row; i < 2 * row; ++i)
3133  *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
3134  KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3135  __kmp_i_lock_table.size = 2 * idx;
3136  }
3137  __kmp_i_lock_table.next++;
3138  lck = KMP_GET_I_LOCK(idx);
3139  // Allocate a new base lock object
3140  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3141  KA_TRACE(20,
3142  ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3143  }
3144 
3145  __kmp_release_lock(&__kmp_global_lock, gtid);
3146 
3147  lck->type = tag;
3148 
3149  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3150  *((kmp_lock_index_t *)user_lock) = idx
3151  << 1; // indirect lock word must be even
3152  } else {
3153  *((kmp_indirect_lock_t **)user_lock) = lck;
3154  }
3155 
3156  return lck;
3157 }
3158 
3159 // User lock lookup for dynamically dispatched locks.
3160 static __forceinline kmp_indirect_lock_t *
3161 __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3162  if (__kmp_env_consistency_check) {
3163  kmp_indirect_lock_t *lck = NULL;
3164  if (user_lock == NULL) {
3165  KMP_FATAL(LockIsUninitialized, func);
3166  }
3167  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3168  kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3169  if (idx >= __kmp_i_lock_table.size) {
3170  KMP_FATAL(LockIsUninitialized, func);
3171  }
3172  lck = KMP_GET_I_LOCK(idx);
3173  } else {
3174  lck = *((kmp_indirect_lock_t **)user_lock);
3175  }
3176  if (lck == NULL) {
3177  KMP_FATAL(LockIsUninitialized, func);
3178  }
3179  return lck;
3180  } else {
3181  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3182  return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3183  } else {
3184  return *((kmp_indirect_lock_t **)user_lock);
3185  }
3186  }
3187 }
3188 
3189 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3190  kmp_dyna_lockseq_t seq) {
3191 #if KMP_USE_ADAPTIVE_LOCKS
3192  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3193  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3194  seq = lockseq_queuing;
3195  }
3196 #endif
3197 #if KMP_USE_TSX
3198  if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.rtm) {
3199  seq = lockseq_queuing;
3200  }
3201 #endif
3202  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3203  kmp_indirect_lock_t *l =
3204  __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3205  KMP_I_LOCK_FUNC(l, init)(l->lock);
3206  KA_TRACE(
3207  20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3208  seq));
3209 }
3210 
3211 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3212  kmp_uint32 gtid = __kmp_entry_gtid();
3213  kmp_indirect_lock_t *l =
3214  __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3215  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3216  kmp_indirect_locktag_t tag = l->type;
3217 
3218  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3219 
3220  // Use the base lock's space to keep the pool chain.
3221  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3222  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3223  l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3224  }
3225  __kmp_indirect_lock_pool[tag] = l;
3226 
3227  __kmp_release_lock(&__kmp_global_lock, gtid);
3228 }
3229 
3230 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3231  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3232  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3233 }
3234 
3235 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3236  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3237  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3238 }
3239 
3240 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3241  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3242  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3243 }
3244 
3245 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3246  kmp_int32 gtid) {
3247  kmp_indirect_lock_t *l =
3248  __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3249  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3250 }
3251 
3252 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3253  kmp_int32 gtid) {
3254  kmp_indirect_lock_t *l =
3255  __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3256  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3257 }
3258 
3259 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3260  kmp_int32 gtid) {
3261  kmp_indirect_lock_t *l =
3262  __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3263  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3264 }
3265 
3266 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3267 
3268 // This is used only in kmp_error.cpp when consistency checking is on.
3269 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3270  switch (seq) {
3271  case lockseq_tas:
3272  case lockseq_nested_tas:
3273  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3274 #if KMP_USE_FUTEX
3275  case lockseq_futex:
3276  case lockseq_nested_futex:
3277  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3278 #endif
3279  case lockseq_ticket:
3280  case lockseq_nested_ticket:
3281  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3282  case lockseq_queuing:
3283  case lockseq_nested_queuing:
3284 #if KMP_USE_ADAPTIVE_LOCKS
3285  case lockseq_adaptive:
3286 #endif
3287  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3288  case lockseq_drdpa:
3289  case lockseq_nested_drdpa:
3290  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3291  default:
3292  return 0;
3293  }
3294 }
3295 
3296 // Initializes data for dynamic user locks.
3297 void __kmp_init_dynamic_user_locks() {
3298  // Initialize jump table for the lock functions
3299  if (__kmp_env_consistency_check) {
3300  __kmp_direct_set = direct_set_check;
3301  __kmp_direct_unset = direct_unset_check;
3302  __kmp_direct_test = direct_test_check;
3303  __kmp_direct_destroy = direct_destroy_check;
3304  __kmp_indirect_set = indirect_set_check;
3305  __kmp_indirect_unset = indirect_unset_check;
3306  __kmp_indirect_test = indirect_test_check;
3307  __kmp_indirect_destroy = indirect_destroy_check;
3308  } else {
3309  __kmp_direct_set = direct_set;
3310  __kmp_direct_unset = direct_unset;
3311  __kmp_direct_test = direct_test;
3312  __kmp_direct_destroy = direct_destroy;
3313  __kmp_indirect_set = indirect_set;
3314  __kmp_indirect_unset = indirect_unset;
3315  __kmp_indirect_test = indirect_test;
3316  __kmp_indirect_destroy = indirect_destroy;
3317  }
3318  // If the user locks have already been initialized, then return. Allow the
3319  // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3320  // new lock tables if they have already been allocated.
3321  if (__kmp_init_user_locks)
3322  return;
3323 
3324  // Initialize lock index table
3325  __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3326  __kmp_i_lock_table.table =
3327  (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3328  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3329  KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3330  __kmp_i_lock_table.next = 0;
3331 
3332  // Indirect lock size
3333  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3334  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3335 #if KMP_USE_ADAPTIVE_LOCKS
3336  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3337 #endif
3338  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3339 #if KMP_USE_TSX
3340  __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3341 #endif
3342  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3343 #if KMP_USE_FUTEX
3344  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3345 #endif
3346  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3347  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3348  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3349 
3350 // Initialize lock accessor/modifier
3351 #define fill_jumps(table, expand, sep) \
3352  { \
3353  table[locktag##sep##ticket] = expand(ticket); \
3354  table[locktag##sep##queuing] = expand(queuing); \
3355  table[locktag##sep##drdpa] = expand(drdpa); \
3356  }
3357 
3358 #if KMP_USE_ADAPTIVE_LOCKS
3359 #define fill_table(table, expand) \
3360  { \
3361  fill_jumps(table, expand, _); \
3362  table[locktag_adaptive] = expand(queuing); \
3363  fill_jumps(table, expand, _nested_); \
3364  }
3365 #else
3366 #define fill_table(table, expand) \
3367  { \
3368  fill_jumps(table, expand, _); \
3369  fill_jumps(table, expand, _nested_); \
3370  }
3371 #endif // KMP_USE_ADAPTIVE_LOCKS
3372 
3373 #define expand(l) \
3374  (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3375  fill_table(__kmp_indirect_set_location, expand);
3376 #undef expand
3377 #define expand(l) \
3378  (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3379  fill_table(__kmp_indirect_set_flags, expand);
3380 #undef expand
3381 #define expand(l) \
3382  (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3383  fill_table(__kmp_indirect_get_location, expand);
3384 #undef expand
3385 #define expand(l) \
3386  (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3387  fill_table(__kmp_indirect_get_flags, expand);
3388 #undef expand
3389 
3390  __kmp_init_user_locks = TRUE;
3391 }
3392 
3393 // Clean up the lock table.
3394 void __kmp_cleanup_indirect_user_locks() {
3395  kmp_lock_index_t i;
3396  int k;
3397 
3398  // Clean up locks in the pools first (they were already destroyed before going
3399  // into the pools).
3400  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3401  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3402  while (l != NULL) {
3403  kmp_indirect_lock_t *ll = l;
3404  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3405  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3406  ll));
3407  __kmp_free(ll->lock);
3408  ll->lock = NULL;
3409  }
3410  __kmp_indirect_lock_pool[k] = NULL;
3411  }
3412  // Clean up the remaining undestroyed locks.
3413  for (i = 0; i < __kmp_i_lock_table.next; i++) {
3414  kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3415  if (l->lock != NULL) {
3416  // Locks not destroyed explicitly need to be destroyed here.
3417  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3418  KA_TRACE(
3419  20,
3420  ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
3421  l));
3422  __kmp_free(l->lock);
3423  }
3424  }
3425  // Free the table
3426  for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3427  __kmp_free(__kmp_i_lock_table.table[i]);
3428  __kmp_free(__kmp_i_lock_table.table);
3429 
3430  __kmp_init_user_locks = FALSE;
3431 }
3432 
3433 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3434 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3435 
3436 #else // KMP_USE_DYNAMIC_LOCK
3437 
3438 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3439  __kmp_init_tas_lock(lck);
3440 }
3441 
3442 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3443  __kmp_init_nested_tas_lock(lck);
3444 }
3445 
3446 #if KMP_USE_FUTEX
3447 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3448  __kmp_init_futex_lock(lck);
3449 }
3450 
3451 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3452  __kmp_init_nested_futex_lock(lck);
3453 }
3454 #endif
3455 
3456 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3457  return lck == lck->lk.self;
3458 }
3459 
3460 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3461  __kmp_init_ticket_lock(lck);
3462 }
3463 
3464 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3465  __kmp_init_nested_ticket_lock(lck);
3466 }
3467 
3468 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3469  return lck == lck->lk.initialized;
3470 }
3471 
3472 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3473  __kmp_init_queuing_lock(lck);
3474 }
3475 
3476 static void
3477 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3478  __kmp_init_nested_queuing_lock(lck);
3479 }
3480 
3481 #if KMP_USE_ADAPTIVE_LOCKS
3482 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3483  __kmp_init_adaptive_lock(lck);
3484 }
3485 #endif
3486 
3487 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3488  return lck == lck->lk.initialized;
3489 }
3490 
3491 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3492  __kmp_init_drdpa_lock(lck);
3493 }
3494 
3495 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3496  __kmp_init_nested_drdpa_lock(lck);
3497 }
3498 
3499 /* user locks
3500  * They are implemented as a table of function pointers which are set to the
3501  * lock functions of the appropriate kind, once that has been determined. */
3502 
3503 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3504 
3505 size_t __kmp_base_user_lock_size = 0;
3506 size_t __kmp_user_lock_size = 0;
3507 
3508 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3509 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3510  kmp_int32 gtid) = NULL;
3511 
3512 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3513  kmp_int32 gtid) = NULL;
3514 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3515  kmp_int32 gtid) = NULL;
3516 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3517 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3518 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3519 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3520  kmp_int32 gtid) = NULL;
3521 
3522 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3523  kmp_int32 gtid) = NULL;
3524 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3525  kmp_int32 gtid) = NULL;
3526 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3527 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3528 
3529 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3530 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3531 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3532  const ident_t *loc) = NULL;
3533 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3534 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3535  kmp_lock_flags_t flags) = NULL;
3536 
3537 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3538  switch (user_lock_kind) {
3539  case lk_default:
3540  default:
3541  KMP_ASSERT(0);
3542 
3543  case lk_tas: {
3544  __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3545  __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3546 
3547  __kmp_get_user_lock_owner_ =
3548  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3549 
3550  if (__kmp_env_consistency_check) {
3551  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3552  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3553  } else {
3554  KMP_BIND_USER_LOCK(tas);
3555  KMP_BIND_NESTED_USER_LOCK(tas);
3556  }
3557 
3558  __kmp_destroy_user_lock_ =
3559  (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3560 
3561  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3562 
3563  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3564 
3565  __kmp_set_user_lock_location_ =
3566  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3567 
3568  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3569 
3570  __kmp_set_user_lock_flags_ =
3571  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3572  } break;
3573 
3574 #if KMP_USE_FUTEX
3575 
3576  case lk_futex: {
3577  __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3578  __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3579 
3580  __kmp_get_user_lock_owner_ =
3581  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3582 
3583  if (__kmp_env_consistency_check) {
3584  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3585  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3586  } else {
3587  KMP_BIND_USER_LOCK(futex);
3588  KMP_BIND_NESTED_USER_LOCK(futex);
3589  }
3590 
3591  __kmp_destroy_user_lock_ =
3592  (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3593 
3594  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3595 
3596  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3597 
3598  __kmp_set_user_lock_location_ =
3599  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3600 
3601  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3602 
3603  __kmp_set_user_lock_flags_ =
3604  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3605  } break;
3606 
3607 #endif // KMP_USE_FUTEX
3608 
3609  case lk_ticket: {
3610  __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3611  __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3612 
3613  __kmp_get_user_lock_owner_ =
3614  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3615 
3616  if (__kmp_env_consistency_check) {
3617  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3618  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3619  } else {
3620  KMP_BIND_USER_LOCK(ticket);
3621  KMP_BIND_NESTED_USER_LOCK(ticket);
3622  }
3623 
3624  __kmp_destroy_user_lock_ =
3625  (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3626 
3627  __kmp_is_user_lock_initialized_ =
3628  (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3629 
3630  __kmp_get_user_lock_location_ =
3631  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3632 
3633  __kmp_set_user_lock_location_ = (void (*)(
3634  kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3635 
3636  __kmp_get_user_lock_flags_ =
3637  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3638 
3639  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3640  &__kmp_set_ticket_lock_flags);
3641  } break;
3642 
3643  case lk_queuing: {
3644  __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3645  __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3646 
3647  __kmp_get_user_lock_owner_ =
3648  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3649 
3650  if (__kmp_env_consistency_check) {
3651  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3652  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3653  } else {
3654  KMP_BIND_USER_LOCK(queuing);
3655  KMP_BIND_NESTED_USER_LOCK(queuing);
3656  }
3657 
3658  __kmp_destroy_user_lock_ =
3659  (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3660 
3661  __kmp_is_user_lock_initialized_ =
3662  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3663 
3664  __kmp_get_user_lock_location_ =
3665  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3666 
3667  __kmp_set_user_lock_location_ = (void (*)(
3668  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3669 
3670  __kmp_get_user_lock_flags_ =
3671  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3672 
3673  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3674  &__kmp_set_queuing_lock_flags);
3675  } break;
3676 
3677 #if KMP_USE_ADAPTIVE_LOCKS
3678  case lk_adaptive: {
3679  __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3680  __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3681 
3682  __kmp_get_user_lock_owner_ =
3683  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3684 
3685  if (__kmp_env_consistency_check) {
3686  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3687  } else {
3688  KMP_BIND_USER_LOCK(adaptive);
3689  }
3690 
3691  __kmp_destroy_user_lock_ =
3692  (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3693 
3694  __kmp_is_user_lock_initialized_ =
3695  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3696 
3697  __kmp_get_user_lock_location_ =
3698  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3699 
3700  __kmp_set_user_lock_location_ = (void (*)(
3701  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3702 
3703  __kmp_get_user_lock_flags_ =
3704  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3705 
3706  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3707  &__kmp_set_queuing_lock_flags);
3708 
3709  } break;
3710 #endif // KMP_USE_ADAPTIVE_LOCKS
3711 
3712  case lk_drdpa: {
3713  __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3714  __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3715 
3716  __kmp_get_user_lock_owner_ =
3717  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3718 
3719  if (__kmp_env_consistency_check) {
3720  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3721  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3722  } else {
3723  KMP_BIND_USER_LOCK(drdpa);
3724  KMP_BIND_NESTED_USER_LOCK(drdpa);
3725  }
3726 
3727  __kmp_destroy_user_lock_ =
3728  (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3729 
3730  __kmp_is_user_lock_initialized_ =
3731  (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3732 
3733  __kmp_get_user_lock_location_ =
3734  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3735 
3736  __kmp_set_user_lock_location_ = (void (*)(
3737  kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3738 
3739  __kmp_get_user_lock_flags_ =
3740  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3741 
3742  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3743  &__kmp_set_drdpa_lock_flags);
3744  } break;
3745  }
3746 }
3747 
3748 // ----------------------------------------------------------------------------
3749 // User lock table & lock allocation
3750 
3751 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3752 kmp_user_lock_p __kmp_lock_pool = NULL;
3753 
3754 // Lock block-allocation support.
3755 kmp_block_of_locks *__kmp_lock_blocks = NULL;
3756 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3757 
3758 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3759  // Assume that kmp_global_lock is held upon entry/exit.
3760  kmp_lock_index_t index;
3761  if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3762  kmp_lock_index_t size;
3763  kmp_user_lock_p *table;
3764  // Reallocate lock table.
3765  if (__kmp_user_lock_table.allocated == 0) {
3766  size = 1024;
3767  } else {
3768  size = __kmp_user_lock_table.allocated * 2;
3769  }
3770  table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3771  KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3772  sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3773  table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3774  // We cannot free the previous table now, since it may be in use by other
3775  // threads. So save the pointer to the previous table in in the first
3776  // element of the new table. All the tables will be organized into a list,
3777  // and could be freed when library shutting down.
3778  __kmp_user_lock_table.table = table;
3779  __kmp_user_lock_table.allocated = size;
3780  }
3781  KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3782  __kmp_user_lock_table.allocated);
3783  index = __kmp_user_lock_table.used;
3784  __kmp_user_lock_table.table[index] = lck;
3785  ++__kmp_user_lock_table.used;
3786  return index;
3787 }
3788 
3789 static kmp_user_lock_p __kmp_lock_block_allocate() {
3790  // Assume that kmp_global_lock is held upon entry/exit.
3791  static int last_index = 0;
3792  if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3793  // Restart the index.
3794  last_index = 0;
3795  // Need to allocate a new block.
3796  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3797  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3798  char *buffer =
3799  (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3800  // Set up the new block.
3801  kmp_block_of_locks *new_block =
3802  (kmp_block_of_locks *)(&buffer[space_for_locks]);
3803  new_block->next_block = __kmp_lock_blocks;
3804  new_block->locks = (void *)buffer;
3805  // Publish the new block.
3806  KMP_MB();
3807  __kmp_lock_blocks = new_block;
3808  }
3809  kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3810  ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3811  last_index++;
3812  return ret;
3813 }
3814 
3815 // Get memory for a lock. It may be freshly allocated memory or reused memory
3816 // from lock pool.
3817 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3818  kmp_lock_flags_t flags) {
3819  kmp_user_lock_p lck;
3820  kmp_lock_index_t index;
3821  KMP_DEBUG_ASSERT(user_lock);
3822 
3823  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3824 
3825  if (__kmp_lock_pool == NULL) {
3826  // Lock pool is empty. Allocate new memory.
3827 
3828  if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3829  lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3830  } else {
3831  lck = __kmp_lock_block_allocate();
3832  }
3833 
3834  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3835  // and debugger has info on all allocated locks.
3836  index = __kmp_lock_table_insert(lck);
3837  } else {
3838  // Pick up lock from pool.
3839  lck = __kmp_lock_pool;
3840  index = __kmp_lock_pool->pool.index;
3841  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3842  }
3843 
3844  // We could potentially differentiate between nested and regular locks
3845  // here, and do the lock table lookup for regular locks only.
3846  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3847  *((kmp_lock_index_t *)user_lock) = index;
3848  } else {
3849  *((kmp_user_lock_p *)user_lock) = lck;
3850  }
3851 
3852  // mark the lock if it is critical section lock.
3853  __kmp_set_user_lock_flags(lck, flags);
3854 
3855  __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3856 
3857  return lck;
3858 }
3859 
3860 // Put lock's memory to pool for reusing.
3861 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3862  kmp_user_lock_p lck) {
3863  KMP_DEBUG_ASSERT(user_lock != NULL);
3864  KMP_DEBUG_ASSERT(lck != NULL);
3865 
3866  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3867 
3868  lck->pool.next = __kmp_lock_pool;
3869  __kmp_lock_pool = lck;
3870  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3871  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3872  KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3873  lck->pool.index = index;
3874  }
3875 
3876  __kmp_release_lock(&__kmp_global_lock, gtid);
3877 }
3878 
3879 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3880  kmp_user_lock_p lck = NULL;
3881 
3882  if (__kmp_env_consistency_check) {
3883  if (user_lock == NULL) {
3884  KMP_FATAL(LockIsUninitialized, func);
3885  }
3886  }
3887 
3888  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3889  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3890  if (__kmp_env_consistency_check) {
3891  if (!(0 < index && index < __kmp_user_lock_table.used)) {
3892  KMP_FATAL(LockIsUninitialized, func);
3893  }
3894  }
3895  KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3896  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3897  lck = __kmp_user_lock_table.table[index];
3898  } else {
3899  lck = *((kmp_user_lock_p *)user_lock);
3900  }
3901 
3902  if (__kmp_env_consistency_check) {
3903  if (lck == NULL) {
3904  KMP_FATAL(LockIsUninitialized, func);
3905  }
3906  }
3907 
3908  return lck;
3909 }
3910 
3911 void __kmp_cleanup_user_locks(void) {
3912  // Reset lock pool. Don't worry about lock in the pool--we will free them when
3913  // iterating through lock table (it includes all the locks, dead or alive).
3914  __kmp_lock_pool = NULL;
3915 
3916 #define IS_CRITICAL(lck) \
3917  ((__kmp_get_user_lock_flags_ != NULL) && \
3918  ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3919 
3920  // Loop through lock table, free all locks.
3921  // Do not free item [0], it is reserved for lock tables list.
3922  //
3923  // FIXME - we are iterating through a list of (pointers to) objects of type
3924  // union kmp_user_lock, but we have no way of knowing whether the base type is
3925  // currently "pool" or whatever the global user lock type is.
3926  //
3927  // We are relying on the fact that for all of the user lock types
3928  // (except "tas"), the first field in the lock struct is the "initialized"
3929  // field, which is set to the address of the lock object itself when
3930  // the lock is initialized. When the union is of type "pool", the
3931  // first field is a pointer to the next object in the free list, which
3932  // will not be the same address as the object itself.
3933  //
3934  // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3935  // for "pool" objects on the free list. This must happen as the "location"
3936  // field of real user locks overlaps the "index" field of "pool" objects.
3937  //
3938  // It would be better to run through the free list, and remove all "pool"
3939  // objects from the lock table before executing this loop. However,
3940  // "pool" objects do not always have their index field set (only on
3941  // lin_32e), and I don't want to search the lock table for the address
3942  // of every "pool" object on the free list.
3943  while (__kmp_user_lock_table.used > 1) {
3944  const ident *loc;
3945 
3946  // reduce __kmp_user_lock_table.used before freeing the lock,
3947  // so that state of locks is consistent
3948  kmp_user_lock_p lck =
3949  __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3950 
3951  if ((__kmp_is_user_lock_initialized_ != NULL) &&
3952  (*__kmp_is_user_lock_initialized_)(lck)) {
3953  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3954  // it is NOT a critical section (user is not responsible for destroying
3955  // criticals) AND we know source location to report.
3956  if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3957  ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3958  (loc->psource != NULL)) {
3959  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3960  KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3961  __kmp_str_loc_free(&str_loc);
3962  }
3963 
3964 #ifdef KMP_DEBUG
3965  if (IS_CRITICAL(lck)) {
3966  KA_TRACE(
3967  20,
3968  ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3969  lck, *(void **)lck));
3970  } else {
3971  KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3972  *(void **)lck));
3973  }
3974 #endif // KMP_DEBUG
3975 
3976  // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3977  __kmp_destroy_user_lock(lck);
3978  }
3979 
3980  // Free the lock if block allocation of locks is not used.
3981  if (__kmp_lock_blocks == NULL) {
3982  __kmp_free(lck);
3983  }
3984  }
3985 
3986 #undef IS_CRITICAL
3987 
3988  // delete lock table(s).
3989  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3990  __kmp_user_lock_table.table = NULL;
3991  __kmp_user_lock_table.allocated = 0;
3992 
3993  while (table_ptr != NULL) {
3994  // In the first element we saved the pointer to the previous
3995  // (smaller) lock table.
3996  kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
3997  __kmp_free(table_ptr);
3998  table_ptr = next;
3999  }
4000 
4001  // Free buffers allocated for blocks of locks.
4002  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4003  __kmp_lock_blocks = NULL;
4004 
4005  while (block_ptr != NULL) {
4006  kmp_block_of_locks_t *next = block_ptr->next_block;
4007  __kmp_free(block_ptr->locks);
4008  // *block_ptr itself was allocated at the end of the locks vector.
4009  block_ptr = next;
4010  }
4011 
4012  TCW_4(__kmp_init_user_locks, FALSE);
4013 }
4014 
4015 #endif // KMP_USE_DYNAMIC_LOCK
void set_stdout()
Definition: kmp.h:4218
Definition: kmp.h:233
void open(const char *filename, const char *mode, const char *env_var=nullptr)
Definition: kmp.h:4201
char const * psource
Definition: kmp.h:243