LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602  on *_32 and *_32e. This is just a temporary workaround for the problem. It
603  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604  in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610  return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629  return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648  kmp_cmplx128_a4_t &rhs) {
649  return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652  kmp_cmplx128_a4_t &rhs) {
653  return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656  kmp_cmplx128_a4_t &rhs) {
657  return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660  kmp_cmplx128_a4_t &rhs) {
661  return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665  kmp_cmplx128_a16_t &rhs) {
666  return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669  kmp_cmplx128_a16_t &rhs) {
670  return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673  kmp_cmplx128_a16_t &rhs) {
674  return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677  kmp_cmplx128_a16_t &rhs) {
678  return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID \
689  if (gtid == KMP_GTID_UNKNOWN) { \
690  gtid = __kmp_entry_gtid(); \
691  } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 // fixed)
696 // OP_ID - operation identifier (add, sub, mul, ...)
697 // TYPE - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700  TYPE *lhs, TYPE rhs) { \
701  KMP_DEBUG_ASSERT(__kmp_init_serial); \
702  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 // OP - operator (it's supposed to contain an assignment)
723 // LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID) \
727  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728  \
729  (*lhs) OP(rhs); \
730  \
731  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735  (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \
736  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section. On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange. Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1. If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG) \
762  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763  KMP_CHECK_GTID; \
764  OP_CRITICAL(OP, 0); \
765  return; \
766  }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770  KMP_CHECK_GTID; \
771  OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772  return; \
773  }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 // TYPE - operands' type
788 // BITS - size in bits, used to distinguish low level calls
789 // OP - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP) \
791  { \
792  TYPE old_value, new_value; \
793  old_value = *(TYPE volatile *)lhs; \
794  new_value = (TYPE)(old_value OP((TYPE)rhs)); \
795  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798  KMP_DO_PAUSE; \
799  \
800  old_value = *(TYPE volatile *)lhs; \
801  new_value = (TYPE)(old_value OP((TYPE)rhs)); \
802  } \
803  }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813  { \
814  struct _sss { \
815  TYPE cmp; \
816  kmp_int##BITS *vvv; \
817  }; \
818  struct _sss old_value, new_value; \
819  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826  KMP_DO_PAUSE; \
827  \
828  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829  new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830  } \
831  }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP) \
840  { \
841  struct _sss { \
842  TYPE cmp; \
843  kmp_int##BITS *vvv; \
844  }; \
845  struct _sss old_value, new_value; \
846  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849  new_value.cmp = old_value.cmp OP rhs; \
850  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853  KMP_DO_PAUSE; \
854  \
855  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856  new_value.cmp = old_value.cmp OP rhs; \
857  } \
858  }
859 
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863  (*lhs) = (*lhs)OP rhs; \
864  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865 
866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867 
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869 
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873  GOMP_FLAG) \
874  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878  }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881  GOMP_FLAG) \
882  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884  OP_CMPXCHG(TYPE, BITS, OP) \
885  }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890  MASK, GOMP_FLAG) \
891  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894  }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897 
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902  GOMP_FLAG) \
903  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908  } else { \
909  KMP_CHECK_GTID; \
910  OP_UPDATE_CRITICAL(TYPE, OP, \
911  LCK_ID) /* unaligned address - use critical */ \
912  } \
913  }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916  GOMP_FLAG) \
917  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921  } else { \
922  KMP_CHECK_GTID; \
923  OP_UPDATE_CRITICAL(TYPE, OP, \
924  LCK_ID) /* unaligned address - use critical */ \
925  } \
926  }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931  MASK, GOMP_FLAG) \
932  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936  } else { \
937  KMP_CHECK_GTID; \
938  OP_UPDATE_CRITICAL(TYPE, OP, \
939  LCK_ID) /* unaligned address - use critical */ \
940  } \
941  }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945 
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948  0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950  0) // __kmpc_atomic_fixed4_sub
951 
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953  KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956 
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962 
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964  KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967 
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 // TYPE_ID - operands type and size (fixed4, float4)
971 // OP_ID - operation identifier (add, sub, mul, ...)
972 // TYPE - operand type
973 // BITS - size in bits, used to distinguish low level calls
974 // OP - operator (used in critical section)
975 // LCK_ID - lock identifier, used to possibly distinguish lock variable
976 // MASK - used for alignment check
977 
978 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986  0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994  0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004  0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008  0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016  0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026  0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028  0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036  0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044  0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and || */
1075 
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082  OP_CRITICAL(= *lhs OP, LCK_ID) \
1083  }
1084 
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086 
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092  OP_CMPXCHG(TYPE, BITS, OP) \
1093  }
1094 
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103  } else { \
1104  KMP_CHECK_GTID; \
1105  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106  } \
1107  }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109 
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119  0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121  0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126 
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C: */
1129 /* MAX, MIN, .EQV., .NEQV. */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132 
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138  \
1139  if (*lhs OP rhs) { /* still need actions? */ \
1140  *lhs = rhs; \
1141  } \
1142  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143 
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148  KMP_CHECK_GTID; \
1149  MIN_MAX_CRITSECT(OP, 0); \
1150  return; \
1151  }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155 
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158  { \
1159  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160  TYPE old_value; \
1161  temp_val = *lhs; \
1162  old_value = temp_val; \
1163  while (old_value OP rhs && /* still need actions? */ \
1164  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165  (kmp_int##BITS *)lhs, \
1166  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168  temp_val = *lhs; \
1169  old_value = temp_val; \
1170  } \
1171  }
1172 
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177  if (*lhs OP rhs) { /* need actions? */ \
1178  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179  MIN_MAX_CRITSECT(OP, LCK_ID) \
1180  } \
1181  }
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188  GOMP_FLAG) \
1189  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190  if (*lhs OP rhs) { \
1191  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193  } \
1194  }
1195 
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200  GOMP_FLAG) \
1201  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202  if (*lhs OP rhs) { \
1203  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206  } else { \
1207  KMP_CHECK_GTID; \
1208  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209  } \
1210  } \
1211  }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213 
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223  0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225  0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 #if KMP_HAVE_QUAD
1239 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1240  1) // __kmpc_atomic_float16_max
1241 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1242  1) // __kmpc_atomic_float16_min
1243 #if (KMP_ARCH_X86)
1244 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1245  1) // __kmpc_atomic_float16_max_a16
1246 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1247  1) // __kmpc_atomic_float16_min_a16
1248 #endif // (KMP_ARCH_X86)
1249 #endif // KMP_HAVE_QUAD
1250 // ------------------------------------------------------------------------
1251 // Need separate macros for .EQV. because of the need of complement (~)
1252 // OP ignored for critical sections, ^=~ used instead
1253 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1254  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1255  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1256  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1257  }
1258 
1259 // ------------------------------------------------------------------------
1260 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1261 // ------------------------------------------------------------------------
1262 // X86 or X86_64: no alignment problems ===================================
1263 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1264  GOMP_FLAG) \
1265  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1266  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1267  OP_CMPXCHG(TYPE, BITS, OP) \
1268  }
1269 // ------------------------------------------------------------------------
1270 #else
1271 // ------------------------------------------------------------------------
1272 // Code for other architectures that don't handle unaligned accesses.
1273 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1274  GOMP_FLAG) \
1275  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1276  OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1277  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1278  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1279  } else { \
1280  KMP_CHECK_GTID; \
1281  OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1282  } \
1283  }
1284 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1285 
1286 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1287  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1288 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1289  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1290 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1291  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1292 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1293  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1294 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1295  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1296 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1297  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1298 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1299  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1300 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1301  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1302 
1303 // ------------------------------------------------------------------------
1304 // Routines for Extended types: long double, _Quad, complex flavours (use
1305 // critical section)
1306 // TYPE_ID, OP_ID, TYPE - detailed above
1307 // OP - operator
1308 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1309 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1310  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1311  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1312  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1313  }
1314 
1315 /* ------------------------------------------------------------------------- */
1316 // routines for long double type
1317 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1318  1) // __kmpc_atomic_float10_add
1319 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1320  1) // __kmpc_atomic_float10_sub
1321 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1322  1) // __kmpc_atomic_float10_mul
1323 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1324  1) // __kmpc_atomic_float10_div
1325 #if KMP_HAVE_QUAD
1326 // routines for _Quad type
1327 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1328  1) // __kmpc_atomic_float16_add
1329 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1330  1) // __kmpc_atomic_float16_sub
1331 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1332  1) // __kmpc_atomic_float16_mul
1333 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1334  1) // __kmpc_atomic_float16_div
1335 #if (KMP_ARCH_X86)
1336 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1337  1) // __kmpc_atomic_float16_add_a16
1338 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1339  1) // __kmpc_atomic_float16_sub_a16
1340 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1341  1) // __kmpc_atomic_float16_mul_a16
1342 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1343  1) // __kmpc_atomic_float16_div_a16
1344 #endif // (KMP_ARCH_X86)
1345 #endif // KMP_HAVE_QUAD
1346 // routines for complex types
1347 
1348 #if USE_CMPXCHG_FIX
1349 // workaround for C78287 (complex(kind=4) data type)
1350 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1351  1) // __kmpc_atomic_cmplx4_add
1352 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1353  1) // __kmpc_atomic_cmplx4_sub
1354 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1355  1) // __kmpc_atomic_cmplx4_mul
1356 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1357  1) // __kmpc_atomic_cmplx4_div
1358 // end of the workaround for C78287
1359 #else
1360 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1361 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1362 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1363 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1364 #endif // USE_CMPXCHG_FIX
1365 
1366 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1367 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1368 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1369 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1370 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1371  1) // __kmpc_atomic_cmplx10_add
1372 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1373  1) // __kmpc_atomic_cmplx10_sub
1374 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1375  1) // __kmpc_atomic_cmplx10_mul
1376 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1377  1) // __kmpc_atomic_cmplx10_div
1378 #if KMP_HAVE_QUAD
1379 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1380  1) // __kmpc_atomic_cmplx16_add
1381 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1382  1) // __kmpc_atomic_cmplx16_sub
1383 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1384  1) // __kmpc_atomic_cmplx16_mul
1385 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1386  1) // __kmpc_atomic_cmplx16_div
1387 #if (KMP_ARCH_X86)
1388 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1389  1) // __kmpc_atomic_cmplx16_add_a16
1390 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1391  1) // __kmpc_atomic_cmplx16_sub_a16
1392 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1393  1) // __kmpc_atomic_cmplx16_mul_a16
1394 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1395  1) // __kmpc_atomic_cmplx16_div_a16
1396 #endif // (KMP_ARCH_X86)
1397 #endif // KMP_HAVE_QUAD
1398 
1399 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1400 // Supported only on IA-32 architecture and Intel(R) 64
1401 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1402 
1403 // ------------------------------------------------------------------------
1404 // Operation on *lhs, rhs bound by critical section
1405 // OP - operator (it's supposed to contain an assignment)
1406 // LCK_ID - lock identifier
1407 // Note: don't check gtid as it should always be valid
1408 // 1, 2-byte - expect valid parameter, other - check before this macro
1409 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1410  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1411  \
1412  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1413  \
1414  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1415 
1416 #ifdef KMP_GOMP_COMPAT
1417 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1418  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1419  KMP_CHECK_GTID; \
1420  OP_CRITICAL_REV(TYPE, OP, 0); \
1421  return; \
1422  }
1423 
1424 #else
1425 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1426 #endif /* KMP_GOMP_COMPAT */
1427 
1428 // Beginning of a definition (provides name, parameters, gebug trace)
1429 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1430 // fixed)
1431 // OP_ID - operation identifier (add, sub, mul, ...)
1432 // TYPE - operands' type
1433 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1434  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1435  TYPE *lhs, TYPE rhs) { \
1436  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1437  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1438 
1439 // ------------------------------------------------------------------------
1440 // Operation on *lhs, rhs using "compare_and_store" routine
1441 // TYPE - operands' type
1442 // BITS - size in bits, used to distinguish low level calls
1443 // OP - operator
1444 // Note: temp_val introduced in order to force the compiler to read
1445 // *lhs only once (w/o it the compiler reads *lhs twice)
1446 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1447  { \
1448  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1449  TYPE old_value, new_value; \
1450  temp_val = *lhs; \
1451  old_value = temp_val; \
1452  new_value = (TYPE)(rhs OP old_value); \
1453  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1454  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1455  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1456  KMP_DO_PAUSE; \
1457  \
1458  temp_val = *lhs; \
1459  old_value = temp_val; \
1460  new_value = (TYPE)(rhs OP old_value); \
1461  } \
1462  }
1463 
1464 // -------------------------------------------------------------------------
1465 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1466  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1467  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1468  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1469  }
1470 
1471 // ------------------------------------------------------------------------
1472 // Entries definition for integer operands
1473 // TYPE_ID - operands type and size (fixed4, float4)
1474 // OP_ID - operation identifier (add, sub, mul, ...)
1475 // TYPE - operand type
1476 // BITS - size in bits, used to distinguish low level calls
1477 // OP - operator (used in critical section)
1478 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1479 
1480 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1481 // ------------------------------------------------------------------------
1482 // Routines for ATOMIC integer operands, other operators
1483 // ------------------------------------------------------------------------
1484 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1485 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1486  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1487 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1488  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1489 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1490  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1491 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1492  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1493 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1494  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1495 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1496  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1497 
1498 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1499  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1500 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1501  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1502 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1503  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1504 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1505  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1506 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1507  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1508 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1509  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1510 
1511 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1512  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1513 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1514  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1515 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1516  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1517 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1518  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1519 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1520  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1521 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1522  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1523 
1524 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1525  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1526 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1527  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1528 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1529  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1530 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1531  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1532 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1533  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1534 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1535  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1536 
1537 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1538  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1539 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1540  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1541 
1542 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1543  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1544 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1545  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1546 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1547 
1548 // ------------------------------------------------------------------------
1549 // Routines for Extended types: long double, _Quad, complex flavours (use
1550 // critical section)
1551 // TYPE_ID, OP_ID, TYPE - detailed above
1552 // OP - operator
1553 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1554 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1555  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1556  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1557  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1558  }
1559 
1560 /* ------------------------------------------------------------------------- */
1561 // routines for long double type
1562 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1563  1) // __kmpc_atomic_float10_sub_rev
1564 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1565  1) // __kmpc_atomic_float10_div_rev
1566 #if KMP_HAVE_QUAD
1567 // routines for _Quad type
1568 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1569  1) // __kmpc_atomic_float16_sub_rev
1570 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1571  1) // __kmpc_atomic_float16_div_rev
1572 #if (KMP_ARCH_X86)
1573 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1574  1) // __kmpc_atomic_float16_sub_a16_rev
1575 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1576  1) // __kmpc_atomic_float16_div_a16_rev
1577 #endif // KMP_ARCH_X86
1578 #endif // KMP_HAVE_QUAD
1579 
1580 // routines for complex types
1581 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1582  1) // __kmpc_atomic_cmplx4_sub_rev
1583 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1584  1) // __kmpc_atomic_cmplx4_div_rev
1585 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1586  1) // __kmpc_atomic_cmplx8_sub_rev
1587 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1588  1) // __kmpc_atomic_cmplx8_div_rev
1589 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1590  1) // __kmpc_atomic_cmplx10_sub_rev
1591 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1592  1) // __kmpc_atomic_cmplx10_div_rev
1593 #if KMP_HAVE_QUAD
1594 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1595  1) // __kmpc_atomic_cmplx16_sub_rev
1596 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1597  1) // __kmpc_atomic_cmplx16_div_rev
1598 #if (KMP_ARCH_X86)
1599 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1600  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1601 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1602  1) // __kmpc_atomic_cmplx16_div_a16_rev
1603 #endif // KMP_ARCH_X86
1604 #endif // KMP_HAVE_QUAD
1605 
1606 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1607 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1608 
1609 /* ------------------------------------------------------------------------ */
1610 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1611 /* Note: in order to reduce the total number of types combinations */
1612 /* it is supposed that compiler converts RHS to longest floating type,*/
1613 /* that is _Quad, before call to any of these routines */
1614 /* Conversion to _Quad will be done by the compiler during calculation, */
1615 /* conversion back to TYPE - before the assignment, like: */
1616 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1617 /* Performance penalty expected because of SW emulation use */
1618 /* ------------------------------------------------------------------------ */
1619 
1620 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1621  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1622  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1623  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1624  KA_TRACE(100, \
1625  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1626  gtid));
1627 
1628 // -------------------------------------------------------------------------
1629 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1630  GOMP_FLAG) \
1631  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1632  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1633  OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1634  }
1635 
1636 // -------------------------------------------------------------------------
1637 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1638 // -------------------------------------------------------------------------
1639 // X86 or X86_64: no alignment problems ====================================
1640 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1641  LCK_ID, MASK, GOMP_FLAG) \
1642  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1643  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1644  OP_CMPXCHG(TYPE, BITS, OP) \
1645  }
1646 // -------------------------------------------------------------------------
1647 #else
1648 // ------------------------------------------------------------------------
1649 // Code for other architectures that don't handle unaligned accesses.
1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1651  LCK_ID, MASK, GOMP_FLAG) \
1652  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1653  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1654  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1655  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1656  } else { \
1657  KMP_CHECK_GTID; \
1658  OP_UPDATE_CRITICAL(TYPE, OP, \
1659  LCK_ID) /* unaligned address - use critical */ \
1660  } \
1661  }
1662 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1663 
1664 // -------------------------------------------------------------------------
1665 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1666 // -------------------------------------------------------------------------
1667 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1668  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1669  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1670  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1671  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1672  }
1673 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1674  LCK_ID, GOMP_FLAG) \
1675  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1676  OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1677  OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1678  }
1679 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1680 
1681 // RHS=float8
1682 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1683  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1684 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1685  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1686 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1687  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1688 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1689  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1690 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1691  0) // __kmpc_atomic_fixed4_mul_float8
1692 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1693  0) // __kmpc_atomic_fixed4_div_float8
1694 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1695  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1696 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1697  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1698 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1699  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1700 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1701  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1702 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1703  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1704 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1705  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1706 
1707 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1708 // use them)
1709 #if KMP_HAVE_QUAD
1710 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1711  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1712 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1713  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1714 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1715  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1716 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1717  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1718 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1719  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1720 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1721  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1723  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1725  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1726 
1727 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1728  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1729 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1730  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1731 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1732  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1733 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1734  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1735 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1736  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1737 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1738  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1740  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1742  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1743 
1744 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1745  0) // __kmpc_atomic_fixed4_add_fp
1746 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1747  0) // __kmpc_atomic_fixed4u_add_fp
1748 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1749  0) // __kmpc_atomic_fixed4_sub_fp
1750 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1751  0) // __kmpc_atomic_fixed4u_sub_fp
1752 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1753  0) // __kmpc_atomic_fixed4_mul_fp
1754 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1755  0) // __kmpc_atomic_fixed4u_mul_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1757  0) // __kmpc_atomic_fixed4_div_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1759  0) // __kmpc_atomic_fixed4u_div_fp
1760 
1761 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1762  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1763 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1764  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1765 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1766  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1767 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1768  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1769 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1770  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1771 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1772  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1774  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1776  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1777 
1778 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1779  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1780 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1781  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1782 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1783  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1784 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1785  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1786 
1787 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1788  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1789 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1790  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1791 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1792  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1793 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1794  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1795 
1796 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1797  1) // __kmpc_atomic_float10_add_fp
1798 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1799  1) // __kmpc_atomic_float10_sub_fp
1800 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1801  1) // __kmpc_atomic_float10_mul_fp
1802 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1803  1) // __kmpc_atomic_float10_div_fp
1804 
1805 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1806 // Reverse operations
1807 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1808  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1809 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1810  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1811 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1812  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1813 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1814  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1815 
1816 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1817  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1818 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1819  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1820 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1821  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1822 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1823  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1824 
1825 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1826  0) // __kmpc_atomic_fixed4_sub_rev_fp
1827 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1828  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1829 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1830  0) // __kmpc_atomic_fixed4_div_rev_fp
1831 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1832  0) // __kmpc_atomic_fixed4u_div_rev_fp
1833 
1834 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1835  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1836 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1837  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1838 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1839  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1840 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1841  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1842 
1843 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1844  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1845 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1846  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1847 
1848 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1849  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1850 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1851  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1852 
1853 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1854  1) // __kmpc_atomic_float10_sub_rev_fp
1855 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1856  1) // __kmpc_atomic_float10_div_rev_fp
1857 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1858 
1859 #endif // KMP_HAVE_QUAD
1860 
1861 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1862 // ------------------------------------------------------------------------
1863 // X86 or X86_64: no alignment problems ====================================
1864 #if USE_CMPXCHG_FIX
1865 // workaround for C78287 (complex(kind=4) data type)
1866 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1867  LCK_ID, MASK, GOMP_FLAG) \
1868  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1869  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1870  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1871  }
1872 // end of the second part of the workaround for C78287
1873 #else
1874 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1875  LCK_ID, MASK, GOMP_FLAG) \
1876  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1877  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1878  OP_CMPXCHG(TYPE, BITS, OP) \
1879  }
1880 #endif // USE_CMPXCHG_FIX
1881 #else
1882 // ------------------------------------------------------------------------
1883 // Code for other architectures that don't handle unaligned accesses.
1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1885  LCK_ID, MASK, GOMP_FLAG) \
1886  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1887  OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1888  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1889  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1890  } else { \
1891  KMP_CHECK_GTID; \
1892  OP_UPDATE_CRITICAL(TYPE, OP, \
1893  LCK_ID) /* unaligned address - use critical */ \
1894  } \
1895  }
1896 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1897 
1898 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1899  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1900 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1901  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1902 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1903  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1904 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1905  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1906 
1907 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1908 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1909 
1910 // ------------------------------------------------------------------------
1911 // Atomic READ routines
1912 
1913 // ------------------------------------------------------------------------
1914 // Beginning of a definition (provides name, parameters, gebug trace)
1915 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1916 // fixed)
1917 // OP_ID - operation identifier (add, sub, mul, ...)
1918 // TYPE - operands' type
1919 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1920  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1921  TYPE *loc) { \
1922  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1923  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1924 
1925 // ------------------------------------------------------------------------
1926 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1927 // TYPE - operands' type
1928 // BITS - size in bits, used to distinguish low level calls
1929 // OP - operator
1930 // Note: temp_val introduced in order to force the compiler to read
1931 // *lhs only once (w/o it the compiler reads *lhs twice)
1932 // TODO: check if it is still necessary
1933 // Return old value regardless of the result of "compare & swap# operation
1934 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1935  { \
1936  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1937  union f_i_union { \
1938  TYPE f_val; \
1939  kmp_int##BITS i_val; \
1940  }; \
1941  union f_i_union old_value; \
1942  temp_val = *loc; \
1943  old_value.f_val = temp_val; \
1944  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1945  (kmp_int##BITS *)loc, \
1946  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1947  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1948  new_value = old_value.f_val; \
1949  return new_value; \
1950  }
1951 
1952 // -------------------------------------------------------------------------
1953 // Operation on *lhs, rhs bound by critical section
1954 // OP - operator (it's supposed to contain an assignment)
1955 // LCK_ID - lock identifier
1956 // Note: don't check gtid as it should always be valid
1957 // 1, 2-byte - expect valid parameter, other - check before this macro
1958 #define OP_CRITICAL_READ(OP, LCK_ID) \
1959  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1960  \
1961  new_value = (*loc); \
1962  \
1963  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1964 
1965 // -------------------------------------------------------------------------
1966 #ifdef KMP_GOMP_COMPAT
1967 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1968  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1969  KMP_CHECK_GTID; \
1970  OP_CRITICAL_READ(OP, 0); \
1971  return new_value; \
1972  }
1973 #else
1974 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1975 #endif /* KMP_GOMP_COMPAT */
1976 
1977 // -------------------------------------------------------------------------
1978 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1979  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1980  TYPE new_value; \
1981  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1982  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1983  return new_value; \
1984  }
1985 // -------------------------------------------------------------------------
1986 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1987  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1988  TYPE new_value; \
1989  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1990  OP_CMPXCHG_READ(TYPE, BITS, OP) \
1991  }
1992 // ------------------------------------------------------------------------
1993 // Routines for Extended types: long double, _Quad, complex flavours (use
1994 // critical section)
1995 // TYPE_ID, OP_ID, TYPE - detailed above
1996 // OP - operator
1997 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1998 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1999  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2000  TYPE new_value; \
2001  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2002  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2003  return new_value; \
2004  }
2005 
2006 // ------------------------------------------------------------------------
2007 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2008 // value doesn't work.
2009 // Let's return the read value through the additional parameter.
2010 #if (KMP_OS_WINDOWS)
2011 
2012 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2013  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2014  \
2015  (*out) = (*loc); \
2016  \
2017  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2018 // ------------------------------------------------------------------------
2019 #ifdef KMP_GOMP_COMPAT
2020 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2021  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2022  KMP_CHECK_GTID; \
2023  OP_CRITICAL_READ_WRK(OP, 0); \
2024  }
2025 #else
2026 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2027 #endif /* KMP_GOMP_COMPAT */
2028 // ------------------------------------------------------------------------
2029 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2030  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2031  TYPE *loc) { \
2032  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2033  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2034 
2035 // ------------------------------------------------------------------------
2036 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2037  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2038  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2039  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2040  }
2041 
2042 #endif // KMP_OS_WINDOWS
2043 
2044 // ------------------------------------------------------------------------
2045 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2046 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2047 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2048  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2049 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2050  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2051 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2052  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2053 
2054 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2055 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2056  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2057 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2058  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2059 
2060 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2061  1) // __kmpc_atomic_float10_rd
2062 #if KMP_HAVE_QUAD
2063 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2064  1) // __kmpc_atomic_float16_rd
2065 #endif // KMP_HAVE_QUAD
2066 
2067 // Fix for CQ220361 on Windows* OS
2068 #if (KMP_OS_WINDOWS)
2069 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2070  1) // __kmpc_atomic_cmplx4_rd
2071 #else
2072 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2073  1) // __kmpc_atomic_cmplx4_rd
2074 #endif // (KMP_OS_WINDOWS)
2075 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2076  1) // __kmpc_atomic_cmplx8_rd
2077 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2078  1) // __kmpc_atomic_cmplx10_rd
2079 #if KMP_HAVE_QUAD
2080 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2081  1) // __kmpc_atomic_cmplx16_rd
2082 #if (KMP_ARCH_X86)
2083 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2084  1) // __kmpc_atomic_float16_a16_rd
2085 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2086  1) // __kmpc_atomic_cmplx16_a16_rd
2087 #endif // (KMP_ARCH_X86)
2088 #endif // KMP_HAVE_QUAD
2089 
2090 // ------------------------------------------------------------------------
2091 // Atomic WRITE routines
2092 
2093 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2094  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2095  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2096  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2097  }
2098 // ------------------------------------------------------------------------
2099 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2100  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2101  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2102  KMP_XCHG_REAL##BITS(lhs, rhs); \
2103  }
2104 
2105 // ------------------------------------------------------------------------
2106 // Operation on *lhs, rhs using "compare_and_store" routine
2107 // TYPE - operands' type
2108 // BITS - size in bits, used to distinguish low level calls
2109 // OP - operator
2110 // Note: temp_val introduced in order to force the compiler to read
2111 // *lhs only once (w/o it the compiler reads *lhs twice)
2112 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2113  { \
2114  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2115  TYPE old_value, new_value; \
2116  temp_val = *lhs; \
2117  old_value = temp_val; \
2118  new_value = rhs; \
2119  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2120  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2121  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2122  temp_val = *lhs; \
2123  old_value = temp_val; \
2124  new_value = rhs; \
2125  } \
2126  }
2127 
2128 // -------------------------------------------------------------------------
2129 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2130  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2131  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2132  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2133  }
2134 
2135 // ------------------------------------------------------------------------
2136 // Routines for Extended types: long double, _Quad, complex flavours (use
2137 // critical section)
2138 // TYPE_ID, OP_ID, TYPE - detailed above
2139 // OP - operator
2140 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2141 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2142  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2143  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2144  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2145  }
2146 // -------------------------------------------------------------------------
2147 
2148 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2149  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2150 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2151  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2152 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2153  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2154 #if (KMP_ARCH_X86)
2155 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2156  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2157 #else
2158 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2159  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2160 #endif // (KMP_ARCH_X86)
2161 
2162 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2163  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2164 #if (KMP_ARCH_X86)
2165 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2166  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2167 #else
2168 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2169  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2170 #endif // (KMP_ARCH_X86)
2171 
2172 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2173  1) // __kmpc_atomic_float10_wr
2174 #if KMP_HAVE_QUAD
2175 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2176  1) // __kmpc_atomic_float16_wr
2177 #endif // KMP_HAVE_QUAD
2178 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2179 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2180  1) // __kmpc_atomic_cmplx8_wr
2181 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2182  1) // __kmpc_atomic_cmplx10_wr
2183 #if KMP_HAVE_QUAD
2184 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2185  1) // __kmpc_atomic_cmplx16_wr
2186 #if (KMP_ARCH_X86)
2187 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2188  1) // __kmpc_atomic_float16_a16_wr
2189 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2190  1) // __kmpc_atomic_cmplx16_a16_wr
2191 #endif // (KMP_ARCH_X86)
2192 #endif // KMP_HAVE_QUAD
2193 
2194 // ------------------------------------------------------------------------
2195 // Atomic CAPTURE routines
2196 
2197 // Beginning of a definition (provides name, parameters, gebug trace)
2198 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2199 // fixed)
2200 // OP_ID - operation identifier (add, sub, mul, ...)
2201 // TYPE - operands' type
2202 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2203  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2204  TYPE *lhs, TYPE rhs, int flag) { \
2205  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2206  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2207 
2208 // -------------------------------------------------------------------------
2209 // Operation on *lhs, rhs bound by critical section
2210 // OP - operator (it's supposed to contain an assignment)
2211 // LCK_ID - lock identifier
2212 // Note: don't check gtid as it should always be valid
2213 // 1, 2-byte - expect valid parameter, other - check before this macro
2214 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2215  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2216  \
2217  if (flag) { \
2218  (*lhs) OP rhs; \
2219  new_value = (*lhs); \
2220  } else { \
2221  new_value = (*lhs); \
2222  (*lhs) OP rhs; \
2223  } \
2224  \
2225  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2226  return new_value;
2227 
2228 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2229  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2230  \
2231  if (flag) { \
2232  (*lhs) = (TYPE)((*lhs)OP rhs); \
2233  new_value = (*lhs); \
2234  } else { \
2235  new_value = (*lhs); \
2236  (*lhs) = (TYPE)((*lhs)OP rhs); \
2237  } \
2238  \
2239  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2240  return new_value;
2241 
2242 // ------------------------------------------------------------------------
2243 #ifdef KMP_GOMP_COMPAT
2244 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2245  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2246  KMP_CHECK_GTID; \
2247  OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2248  }
2249 #else
2250 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2251 #endif /* KMP_GOMP_COMPAT */
2252 
2253 // ------------------------------------------------------------------------
2254 // Operation on *lhs, rhs using "compare_and_store" routine
2255 // TYPE - operands' type
2256 // BITS - size in bits, used to distinguish low level calls
2257 // OP - operator
2258 // Note: temp_val introduced in order to force the compiler to read
2259 // *lhs only once (w/o it the compiler reads *lhs twice)
2260 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2261  { \
2262  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2263  TYPE old_value, new_value; \
2264  temp_val = *lhs; \
2265  old_value = temp_val; \
2266  new_value = (TYPE)(old_value OP rhs); \
2267  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2268  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2269  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2270  temp_val = *lhs; \
2271  old_value = temp_val; \
2272  new_value = (TYPE)(old_value OP rhs); \
2273  } \
2274  if (flag) { \
2275  return new_value; \
2276  } else \
2277  return old_value; \
2278  }
2279 
2280 // -------------------------------------------------------------------------
2281 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2282  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2283  TYPE new_value; \
2284  (void)new_value; \
2285  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2286  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2287  }
2288 
2289 // -------------------------------------------------------------------------
2290 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2291  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2292  TYPE old_value, new_value; \
2293  (void)new_value; \
2294  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2295  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2296  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2297  if (flag) { \
2298  return old_value OP rhs; \
2299  } else \
2300  return old_value; \
2301  }
2302 // -------------------------------------------------------------------------
2303 
2304 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2305  0) // __kmpc_atomic_fixed4_add_cpt
2306 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2307  0) // __kmpc_atomic_fixed4_sub_cpt
2308 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2309  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2310 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2311  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2312 
2313 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2314  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2315 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2316  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2317 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2318  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2319 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2320  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2321 
2322 // ------------------------------------------------------------------------
2323 // Entries definition for integer operands
2324 // TYPE_ID - operands type and size (fixed4, float4)
2325 // OP_ID - operation identifier (add, sub, mul, ...)
2326 // TYPE - operand type
2327 // BITS - size in bits, used to distinguish low level calls
2328 // OP - operator (used in critical section)
2329 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2330 // ------------------------------------------------------------------------
2331 // Routines for ATOMIC integer operands, other operators
2332 // ------------------------------------------------------------------------
2333 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2334 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2335  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2337  0) // __kmpc_atomic_fixed1_andb_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2339  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2341  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2343  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2345  0) // __kmpc_atomic_fixed1_orb_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2347  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2349  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2351  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2353  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2355  0) // __kmpc_atomic_fixed1_xor_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2357  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2359  0) // __kmpc_atomic_fixed2_andb_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2361  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2363  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2365  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2367  0) // __kmpc_atomic_fixed2_orb_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2369  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2371  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2373  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2375  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2377  0) // __kmpc_atomic_fixed2_xor_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2379  0) // __kmpc_atomic_fixed4_andb_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2381  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2383  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2384 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2385  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2386 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2387  0) // __kmpc_atomic_fixed4_orb_cpt
2388 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2389  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2390 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2391  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2392 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2393  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2394 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2395  0) // __kmpc_atomic_fixed4_xor_cpt
2396 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2397  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2398 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2399  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2400 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2401  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2402 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2403  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2404 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2405  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2406 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2407  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2408 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2409  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2410 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2411  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2412 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2413  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2414 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2415  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2416 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2417  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2418 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2419  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2420 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2421  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2422 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2423 
2424 // CAPTURE routines for mixed types RHS=float16
2425 #if KMP_HAVE_QUAD
2426 
2427 // Beginning of a definition (provides name, parameters, gebug trace)
2428 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2429 // fixed)
2430 // OP_ID - operation identifier (add, sub, mul, ...)
2431 // TYPE - operands' type
2432 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2433  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2434  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2435  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2436  KA_TRACE(100, \
2437  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2438  gtid));
2439 
2440 // -------------------------------------------------------------------------
2441 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2442  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2443  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2444  TYPE new_value; \
2445  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2446  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2447  }
2448 
2449 // -------------------------------------------------------------------------
2450 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2451  LCK_ID, GOMP_FLAG) \
2452  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2453  TYPE new_value; \
2454  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2455  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2456  }
2457 
2458 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2459  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2460 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2461  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2462 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2463  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2464 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2465  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2467  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2469  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2471  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2473  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2474 
2475 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2476  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2478  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2480  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2482  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2484  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2486  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2488  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2490  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2491 
2492 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2493  0) // __kmpc_atomic_fixed4_add_cpt_fp
2494 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2495  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2496 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2497  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2499  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2501  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2503  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2505  0) // __kmpc_atomic_fixed4_div_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2507  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2508 
2509 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2510  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2511 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2512  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2513 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2514  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2515 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2516  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2517 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2518  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2520  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2522  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2524  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2525 
2526 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2527  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2528 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2529  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2530 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2531  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2532 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2533  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2534 
2535 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2536  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2537 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2538  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2539 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2540  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2541 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2542  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2543 
2544 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2545  1) // __kmpc_atomic_float10_add_cpt_fp
2546 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2547  1) // __kmpc_atomic_float10_sub_cpt_fp
2548 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2549  1) // __kmpc_atomic_float10_mul_cpt_fp
2550 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2551  1) // __kmpc_atomic_float10_div_cpt_fp
2552 
2553 #endif // KMP_HAVE_QUAD
2554 
2555 // ------------------------------------------------------------------------
2556 // Routines for C/C++ Reduction operators && and ||
2557 
2558 // -------------------------------------------------------------------------
2559 // Operation on *lhs, rhs bound by critical section
2560 // OP - operator (it's supposed to contain an assignment)
2561 // LCK_ID - lock identifier
2562 // Note: don't check gtid as it should always be valid
2563 // 1, 2-byte - expect valid parameter, other - check before this macro
2564 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2565  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2566  \
2567  if (flag) { \
2568  new_value OP rhs; \
2569  (*lhs) = new_value; \
2570  } else { \
2571  new_value = (*lhs); \
2572  (*lhs) OP rhs; \
2573  } \
2574  \
2575  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2576 
2577 // ------------------------------------------------------------------------
2578 #ifdef KMP_GOMP_COMPAT
2579 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2580  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2581  KMP_CHECK_GTID; \
2582  OP_CRITICAL_L_CPT(OP, 0); \
2583  return new_value; \
2584  }
2585 #else
2586 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2587 #endif /* KMP_GOMP_COMPAT */
2588 
2589 // ------------------------------------------------------------------------
2590 // Need separate macros for &&, || because there is no combined assignment
2591 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2592  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2593  TYPE new_value; \
2594  (void)new_value; \
2595  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2596  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2597  }
2598 
2599 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2600  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2601 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2602  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2603 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2604  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2605 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2606  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2607 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2608  0) // __kmpc_atomic_fixed4_andl_cpt
2609 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2610  0) // __kmpc_atomic_fixed4_orl_cpt
2611 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2612  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2613 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2614  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2615 
2616 // -------------------------------------------------------------------------
2617 // Routines for Fortran operators that matched no one in C:
2618 // MAX, MIN, .EQV., .NEQV.
2619 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2620 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2621 
2622 // -------------------------------------------------------------------------
2623 // MIN and MAX need separate macros
2624 // OP - operator to check if we need any actions?
2625 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2626  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2627  \
2628  if (*lhs OP rhs) { /* still need actions? */ \
2629  old_value = *lhs; \
2630  *lhs = rhs; \
2631  if (flag) \
2632  new_value = rhs; \
2633  else \
2634  new_value = old_value; \
2635  } else { \
2636  new_value = *lhs; \
2637  } \
2638  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2639  return new_value;
2640 
2641 // -------------------------------------------------------------------------
2642 #ifdef KMP_GOMP_COMPAT
2643 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2644  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2645  KMP_CHECK_GTID; \
2646  MIN_MAX_CRITSECT_CPT(OP, 0); \
2647  }
2648 #else
2649 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2650 #endif /* KMP_GOMP_COMPAT */
2651 
2652 // -------------------------------------------------------------------------
2653 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2654  { \
2655  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2656  /*TYPE old_value; */ \
2657  temp_val = *lhs; \
2658  old_value = temp_val; \
2659  while (old_value OP rhs && /* still need actions? */ \
2660  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2661  (kmp_int##BITS *)lhs, \
2662  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2663  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2664  temp_val = *lhs; \
2665  old_value = temp_val; \
2666  } \
2667  if (flag) \
2668  return rhs; \
2669  else \
2670  return old_value; \
2671  }
2672 
2673 // -------------------------------------------------------------------------
2674 // 1-byte, 2-byte operands - use critical section
2675 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2676  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2677  TYPE new_value, old_value; \
2678  if (*lhs OP rhs) { /* need actions? */ \
2679  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2680  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2681  } \
2682  return *lhs; \
2683  }
2684 
2685 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2686  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2687  TYPE new_value, old_value; \
2688  (void)new_value; \
2689  if (*lhs OP rhs) { \
2690  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2691  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2692  } \
2693  return *lhs; \
2694  }
2695 
2696 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2697  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2698 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2699  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2700 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2701  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2702 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2703  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2704 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2705  0) // __kmpc_atomic_fixed4_max_cpt
2706 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2707  0) // __kmpc_atomic_fixed4_min_cpt
2708 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2709  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2710 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2711  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2712 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2713  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2714 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2715  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2716 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2717  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2718 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2719  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2720 #if KMP_HAVE_QUAD
2721 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2722  1) // __kmpc_atomic_float16_max_cpt
2723 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2724  1) // __kmpc_atomic_float16_min_cpt
2725 #if (KMP_ARCH_X86)
2726 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2727  1) // __kmpc_atomic_float16_max_a16_cpt
2728 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2729  1) // __kmpc_atomic_float16_mix_a16_cpt
2730 #endif // (KMP_ARCH_X86)
2731 #endif // KMP_HAVE_QUAD
2732 
2733 // ------------------------------------------------------------------------
2734 #ifdef KMP_GOMP_COMPAT
2735 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2736  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2737  KMP_CHECK_GTID; \
2738  OP_CRITICAL_CPT(OP, 0); \
2739  }
2740 #else
2741 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2742 #endif /* KMP_GOMP_COMPAT */
2743 // ------------------------------------------------------------------------
2744 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2745  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2746  TYPE new_value; \
2747  (void)new_value; \
2748  OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2749  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2750  }
2751 
2752 // ------------------------------------------------------------------------
2753 
2754 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2755  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2756 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2757  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2758 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2759  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2760 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2761  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2762 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2763  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2764 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2765  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2766 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2767  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2768 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2769  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2770 
2771 // ------------------------------------------------------------------------
2772 // Routines for Extended types: long double, _Quad, complex flavours (use
2773 // critical section)
2774 // TYPE_ID, OP_ID, TYPE - detailed above
2775 // OP - operator
2776 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2777 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2778  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2779  TYPE new_value; \
2780  OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2781  OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2782  }
2783 
2784 // ------------------------------------------------------------------------
2785 // Workaround for cmplx4. Regular routines with return value don't work
2786 // on Win_32e. Let's return captured values through the additional parameter.
2787 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2788  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2789  \
2790  if (flag) { \
2791  (*lhs) OP rhs; \
2792  (*out) = (*lhs); \
2793  } else { \
2794  (*out) = (*lhs); \
2795  (*lhs) OP rhs; \
2796  } \
2797  \
2798  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2799  return;
2800 // ------------------------------------------------------------------------
2801 
2802 #ifdef KMP_GOMP_COMPAT
2803 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2804  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2805  KMP_CHECK_GTID; \
2806  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2807  }
2808 #else
2809 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2810 #endif /* KMP_GOMP_COMPAT */
2811 // ------------------------------------------------------------------------
2812 
2813 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2814  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2815  TYPE rhs, TYPE *out, int flag) { \
2816  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2817  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2818 // ------------------------------------------------------------------------
2819 
2820 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2821  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2822  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2823  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2824  }
2825 // The end of workaround for cmplx4
2826 
2827 /* ------------------------------------------------------------------------- */
2828 // routines for long double type
2829 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2830  1) // __kmpc_atomic_float10_add_cpt
2831 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2832  1) // __kmpc_atomic_float10_sub_cpt
2833 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2834  1) // __kmpc_atomic_float10_mul_cpt
2835 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2836  1) // __kmpc_atomic_float10_div_cpt
2837 #if KMP_HAVE_QUAD
2838 // routines for _Quad type
2839 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2840  1) // __kmpc_atomic_float16_add_cpt
2841 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2842  1) // __kmpc_atomic_float16_sub_cpt
2843 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2844  1) // __kmpc_atomic_float16_mul_cpt
2845 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2846  1) // __kmpc_atomic_float16_div_cpt
2847 #if (KMP_ARCH_X86)
2848 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2849  1) // __kmpc_atomic_float16_add_a16_cpt
2850 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2851  1) // __kmpc_atomic_float16_sub_a16_cpt
2852 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2853  1) // __kmpc_atomic_float16_mul_a16_cpt
2854 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2855  1) // __kmpc_atomic_float16_div_a16_cpt
2856 #endif // (KMP_ARCH_X86)
2857 #endif // KMP_HAVE_QUAD
2858 
2859 // routines for complex types
2860 
2861 // cmplx4 routines to return void
2862 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2863  1) // __kmpc_atomic_cmplx4_add_cpt
2864 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2865  1) // __kmpc_atomic_cmplx4_sub_cpt
2866 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2867  1) // __kmpc_atomic_cmplx4_mul_cpt
2868 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2869  1) // __kmpc_atomic_cmplx4_div_cpt
2870 
2871 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2872  1) // __kmpc_atomic_cmplx8_add_cpt
2873 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2874  1) // __kmpc_atomic_cmplx8_sub_cpt
2875 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2876  1) // __kmpc_atomic_cmplx8_mul_cpt
2877 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2878  1) // __kmpc_atomic_cmplx8_div_cpt
2879 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2880  1) // __kmpc_atomic_cmplx10_add_cpt
2881 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2882  1) // __kmpc_atomic_cmplx10_sub_cpt
2883 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2884  1) // __kmpc_atomic_cmplx10_mul_cpt
2885 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2886  1) // __kmpc_atomic_cmplx10_div_cpt
2887 #if KMP_HAVE_QUAD
2888 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2889  1) // __kmpc_atomic_cmplx16_add_cpt
2890 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2891  1) // __kmpc_atomic_cmplx16_sub_cpt
2892 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2893  1) // __kmpc_atomic_cmplx16_mul_cpt
2894 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2895  1) // __kmpc_atomic_cmplx16_div_cpt
2896 #if (KMP_ARCH_X86)
2897 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2898  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2899 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2900  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2901 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2902  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2903 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2904  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2905 #endif // (KMP_ARCH_X86)
2906 #endif // KMP_HAVE_QUAD
2907 
2908 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2909 // binop x; v = x; } for non-commutative operations.
2910 // Supported only on IA-32 architecture and Intel(R) 64
2911 
2912 // -------------------------------------------------------------------------
2913 // Operation on *lhs, rhs bound by critical section
2914 // OP - operator (it's supposed to contain an assignment)
2915 // LCK_ID - lock identifier
2916 // Note: don't check gtid as it should always be valid
2917 // 1, 2-byte - expect valid parameter, other - check before this macro
2918 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2919  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2920  \
2921  if (flag) { \
2922  /*temp_val = (*lhs);*/ \
2923  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2924  new_value = (*lhs); \
2925  } else { \
2926  new_value = (*lhs); \
2927  (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2928  } \
2929  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2930  return new_value;
2931 
2932 // ------------------------------------------------------------------------
2933 #ifdef KMP_GOMP_COMPAT
2934 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2935  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2936  KMP_CHECK_GTID; \
2937  OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2938  }
2939 #else
2940 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2941 #endif /* KMP_GOMP_COMPAT */
2942 
2943 // ------------------------------------------------------------------------
2944 // Operation on *lhs, rhs using "compare_and_store" routine
2945 // TYPE - operands' type
2946 // BITS - size in bits, used to distinguish low level calls
2947 // OP - operator
2948 // Note: temp_val introduced in order to force the compiler to read
2949 // *lhs only once (w/o it the compiler reads *lhs twice)
2950 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2951  { \
2952  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2953  TYPE old_value, new_value; \
2954  temp_val = *lhs; \
2955  old_value = temp_val; \
2956  new_value = (TYPE)(rhs OP old_value); \
2957  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2958  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2959  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2960  temp_val = *lhs; \
2961  old_value = temp_val; \
2962  new_value = (TYPE)(rhs OP old_value); \
2963  } \
2964  if (flag) { \
2965  return new_value; \
2966  } else \
2967  return old_value; \
2968  }
2969 
2970 // -------------------------------------------------------------------------
2971 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2972  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2973  TYPE new_value; \
2974  (void)new_value; \
2975  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2976  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2977  }
2978 
2979 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2980  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2981 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2982  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2983 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2984  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2985 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2986  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2987 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2988  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2989 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2990  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2991 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2992  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2993 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2994  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2995 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2996  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2998  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3000  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3002  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3004  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3006  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3008  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3010  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3012  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3014  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3016  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3018  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3020  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3022  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3024  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3026  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3028  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3030  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3032  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3034  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3035 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3036 
3037 // ------------------------------------------------------------------------
3038 // Routines for Extended types: long double, _Quad, complex flavours (use
3039 // critical section)
3040 // TYPE_ID, OP_ID, TYPE - detailed above
3041 // OP - operator
3042 // LCK_ID - lock identifier, used to possibly distinguish lock variable
3043 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3044  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3045  TYPE new_value; \
3046  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3047  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3048  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3049  }
3050 
3051 /* ------------------------------------------------------------------------- */
3052 // routines for long double type
3053 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3054  1) // __kmpc_atomic_float10_sub_cpt_rev
3055 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3056  1) // __kmpc_atomic_float10_div_cpt_rev
3057 #if KMP_HAVE_QUAD
3058 // routines for _Quad type
3059 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3060  1) // __kmpc_atomic_float16_sub_cpt_rev
3061 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3062  1) // __kmpc_atomic_float16_div_cpt_rev
3063 #if (KMP_ARCH_X86)
3064 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3065  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3066 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3067  1) // __kmpc_atomic_float16_div_a16_cpt_rev
3068 #endif // (KMP_ARCH_X86)
3069 #endif // KMP_HAVE_QUAD
3070 
3071 // routines for complex types
3072 
3073 // ------------------------------------------------------------------------
3074 // Workaround for cmplx4. Regular routines with return value don't work
3075 // on Win_32e. Let's return captured values through the additional parameter.
3076 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3077  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3078  \
3079  if (flag) { \
3080  (*lhs) = (rhs)OP(*lhs); \
3081  (*out) = (*lhs); \
3082  } else { \
3083  (*out) = (*lhs); \
3084  (*lhs) = (rhs)OP(*lhs); \
3085  } \
3086  \
3087  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3088  return;
3089 // ------------------------------------------------------------------------
3090 
3091 #ifdef KMP_GOMP_COMPAT
3092 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3093  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3094  KMP_CHECK_GTID; \
3095  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3096  }
3097 #else
3098 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3099 #endif /* KMP_GOMP_COMPAT */
3100 // ------------------------------------------------------------------------
3101 
3102 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3103  GOMP_FLAG) \
3104  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3105  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3106  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3107  }
3108 // The end of workaround for cmplx4
3109 
3110 // !!! TODO: check if we need to return void for cmplx4 routines
3111 // cmplx4 routines to return void
3112 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3113  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3114 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3115  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3116 
3117 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3118  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3119 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3120  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3121 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3122  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3123 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3124  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3125 #if KMP_HAVE_QUAD
3126 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3127  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3128 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3129  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3130 #if (KMP_ARCH_X86)
3131 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3132  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3133 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3134  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3135 #endif // (KMP_ARCH_X86)
3136 #endif // KMP_HAVE_QUAD
3137 
3138 // Capture reverse for mixed type: RHS=float16
3139 #if KMP_HAVE_QUAD
3140 
3141 // Beginning of a definition (provides name, parameters, gebug trace)
3142 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3143 // fixed)
3144 // OP_ID - operation identifier (add, sub, mul, ...)
3145 // TYPE - operands' type
3146 // -------------------------------------------------------------------------
3147 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3148  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3149  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3150  TYPE new_value; \
3151  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3152  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3153  }
3154 
3155 // -------------------------------------------------------------------------
3156 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3157  LCK_ID, GOMP_FLAG) \
3158  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3159  TYPE new_value; \
3160  OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3161  OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3162  }
3163 
3164 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3165  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3166 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3167  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3168 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3169  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3170 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3171  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3172 
3173 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3174  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3175 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3176  1,
3177  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3179  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3180 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3181  1,
3182  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3183 
3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3185  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3187  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3189  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3190 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3191  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3192 
3193 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3194  7,
3195  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3197  8i, 7,
3198  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3199 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3200  7,
3201  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3203  8i, 7,
3204  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3205 
3206 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3207  4r, 3,
3208  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3209 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3210  4r, 3,
3211  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3212 
3213 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3214  8r, 7,
3215  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3216 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3217  8r, 7,
3218  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3219 
3220 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3221  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3222 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3223  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3224 
3225 #endif // KMP_HAVE_QUAD
3226 
3227 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3228 
3229 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3230  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3231  TYPE rhs) { \
3232  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3233  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3234 
3235 #define CRITICAL_SWP(LCK_ID) \
3236  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3237  \
3238  old_value = (*lhs); \
3239  (*lhs) = rhs; \
3240  \
3241  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3242  return old_value;
3243 
3244 // ------------------------------------------------------------------------
3245 #ifdef KMP_GOMP_COMPAT
3246 #define GOMP_CRITICAL_SWP(FLAG) \
3247  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3248  KMP_CHECK_GTID; \
3249  CRITICAL_SWP(0); \
3250  }
3251 #else
3252 #define GOMP_CRITICAL_SWP(FLAG)
3253 #endif /* KMP_GOMP_COMPAT */
3254 
3255 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3256  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3257  TYPE old_value; \
3258  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3259  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3260  return old_value; \
3261  }
3262 // ------------------------------------------------------------------------
3263 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3264  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3265  TYPE old_value; \
3266  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3267  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3268  return old_value; \
3269  }
3270 
3271 // ------------------------------------------------------------------------
3272 #define CMPXCHG_SWP(TYPE, BITS) \
3273  { \
3274  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3275  TYPE old_value, new_value; \
3276  temp_val = *lhs; \
3277  old_value = temp_val; \
3278  new_value = rhs; \
3279  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3280  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3281  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3282  temp_val = *lhs; \
3283  old_value = temp_val; \
3284  new_value = rhs; \
3285  } \
3286  return old_value; \
3287  }
3288 
3289 // -------------------------------------------------------------------------
3290 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3291  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3292  TYPE old_value; \
3293  (void)old_value; \
3294  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3295  CMPXCHG_SWP(TYPE, BITS) \
3296  }
3297 
3298 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3299 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3300 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3301 
3302 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3303  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3304 
3305 #if (KMP_ARCH_X86)
3306 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3307  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3308 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3309  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3310 #else
3311 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3312 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3313  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3314 #endif // (KMP_ARCH_X86)
3315 
3316 // ------------------------------------------------------------------------
3317 // Routines for Extended types: long double, _Quad, complex flavours (use
3318 // critical section)
3319 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3320  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3321  TYPE old_value; \
3322  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3323  CRITICAL_SWP(LCK_ID) \
3324  }
3325 
3326 // ------------------------------------------------------------------------
3327 // !!! TODO: check if we need to return void for cmplx4 routines
3328 // Workaround for cmplx4. Regular routines with return value don't work
3329 // on Win_32e. Let's return captured values through the additional parameter.
3330 
3331 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3332  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3333  TYPE rhs, TYPE *out) { \
3334  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3335  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3336 
3337 #define CRITICAL_SWP_WRK(LCK_ID) \
3338  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3339  \
3340  tmp = (*lhs); \
3341  (*lhs) = (rhs); \
3342  (*out) = tmp; \
3343  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3344  return;
3345 // ------------------------------------------------------------------------
3346 
3347 #ifdef KMP_GOMP_COMPAT
3348 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3349  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3350  KMP_CHECK_GTID; \
3351  CRITICAL_SWP_WRK(0); \
3352  }
3353 #else
3354 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3355 #endif /* KMP_GOMP_COMPAT */
3356 // ------------------------------------------------------------------------
3357 
3358 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3359  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3360  TYPE tmp; \
3361  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3362  CRITICAL_SWP_WRK(LCK_ID) \
3363  }
3364 // The end of workaround for cmplx4
3365 
3366 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3367 #if KMP_HAVE_QUAD
3368 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3369 #endif // KMP_HAVE_QUAD
3370 // cmplx4 routine to return void
3371 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3372 
3373 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3374 // __kmpc_atomic_cmplx4_swp
3375 
3376 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3377 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3378 #if KMP_HAVE_QUAD
3379 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3380 #if (KMP_ARCH_X86)
3381 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3382  1) // __kmpc_atomic_float16_a16_swp
3383 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3384  1) // __kmpc_atomic_cmplx16_a16_swp
3385 #endif // (KMP_ARCH_X86)
3386 #endif // KMP_HAVE_QUAD
3387 
3388 // End of OpenMP 4.0 Capture
3389 
3390 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3391 
3392 #undef OP_CRITICAL
3393 
3394 /* ------------------------------------------------------------------------ */
3395 /* Generic atomic routines */
3396 
3397 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3398  void (*f)(void *, void *, void *)) {
3399  KMP_DEBUG_ASSERT(__kmp_init_serial);
3400 
3401  if (
3402 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3403  FALSE /* must use lock */
3404 #else
3405  TRUE
3406 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3407  ) {
3408  kmp_int8 old_value, new_value;
3409 
3410  old_value = *(kmp_int8 *)lhs;
3411  (*f)(&new_value, &old_value, rhs);
3412 
3413  /* TODO: Should this be acquire or release? */
3414  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3415  *(kmp_int8 *)&new_value)) {
3416  KMP_CPU_PAUSE();
3417 
3418  old_value = *(kmp_int8 *)lhs;
3419  (*f)(&new_value, &old_value, rhs);
3420  }
3421 
3422  return;
3423  } else {
3424  // All 1-byte data is of integer data type.
3425 
3426 #ifdef KMP_GOMP_COMPAT
3427  if (__kmp_atomic_mode == 2) {
3428  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3429  } else
3430 #endif /* KMP_GOMP_COMPAT */
3431  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3432 
3433  (*f)(lhs, lhs, rhs);
3434 
3435 #ifdef KMP_GOMP_COMPAT
3436  if (__kmp_atomic_mode == 2) {
3437  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3438  } else
3439 #endif /* KMP_GOMP_COMPAT */
3440  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3441  }
3442 }
3443 
3444 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3445  void (*f)(void *, void *, void *)) {
3446  if (
3447 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3448  FALSE /* must use lock */
3449 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3450  TRUE /* no alignment problems */
3451 #else
3452  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3453 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3454  ) {
3455  kmp_int16 old_value, new_value;
3456 
3457  old_value = *(kmp_int16 *)lhs;
3458  (*f)(&new_value, &old_value, rhs);
3459 
3460  /* TODO: Should this be acquire or release? */
3461  while (!KMP_COMPARE_AND_STORE_ACQ16(
3462  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3463  KMP_CPU_PAUSE();
3464 
3465  old_value = *(kmp_int16 *)lhs;
3466  (*f)(&new_value, &old_value, rhs);
3467  }
3468 
3469  return;
3470  } else {
3471  // All 2-byte data is of integer data type.
3472 
3473 #ifdef KMP_GOMP_COMPAT
3474  if (__kmp_atomic_mode == 2) {
3475  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3476  } else
3477 #endif /* KMP_GOMP_COMPAT */
3478  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3479 
3480  (*f)(lhs, lhs, rhs);
3481 
3482 #ifdef KMP_GOMP_COMPAT
3483  if (__kmp_atomic_mode == 2) {
3484  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3485  } else
3486 #endif /* KMP_GOMP_COMPAT */
3487  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3488  }
3489 }
3490 
3491 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3492  void (*f)(void *, void *, void *)) {
3493  KMP_DEBUG_ASSERT(__kmp_init_serial);
3494 
3495  if (
3496 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3497 // Gomp compatibility is broken if this routine is called for floats.
3498 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3499  TRUE /* no alignment problems */
3500 #else
3501  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3502 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3503  ) {
3504  kmp_int32 old_value, new_value;
3505 
3506  old_value = *(kmp_int32 *)lhs;
3507  (*f)(&new_value, &old_value, rhs);
3508 
3509  /* TODO: Should this be acquire or release? */
3510  while (!KMP_COMPARE_AND_STORE_ACQ32(
3511  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3512  KMP_CPU_PAUSE();
3513 
3514  old_value = *(kmp_int32 *)lhs;
3515  (*f)(&new_value, &old_value, rhs);
3516  }
3517 
3518  return;
3519  } else {
3520  // Use __kmp_atomic_lock_4i for all 4-byte data,
3521  // even if it isn't of integer data type.
3522 
3523 #ifdef KMP_GOMP_COMPAT
3524  if (__kmp_atomic_mode == 2) {
3525  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3526  } else
3527 #endif /* KMP_GOMP_COMPAT */
3528  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3529 
3530  (*f)(lhs, lhs, rhs);
3531 
3532 #ifdef KMP_GOMP_COMPAT
3533  if (__kmp_atomic_mode == 2) {
3534  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3535  } else
3536 #endif /* KMP_GOMP_COMPAT */
3537  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3538  }
3539 }
3540 
3541 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3542  void (*f)(void *, void *, void *)) {
3543  KMP_DEBUG_ASSERT(__kmp_init_serial);
3544  if (
3545 
3546 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3547  FALSE /* must use lock */
3548 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3549  TRUE /* no alignment problems */
3550 #else
3551  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3552 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3553  ) {
3554  kmp_int64 old_value, new_value;
3555 
3556  old_value = *(kmp_int64 *)lhs;
3557  (*f)(&new_value, &old_value, rhs);
3558  /* TODO: Should this be acquire or release? */
3559  while (!KMP_COMPARE_AND_STORE_ACQ64(
3560  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3561  KMP_CPU_PAUSE();
3562 
3563  old_value = *(kmp_int64 *)lhs;
3564  (*f)(&new_value, &old_value, rhs);
3565  }
3566 
3567  return;
3568  } else {
3569  // Use __kmp_atomic_lock_8i for all 8-byte data,
3570  // even if it isn't of integer data type.
3571 
3572 #ifdef KMP_GOMP_COMPAT
3573  if (__kmp_atomic_mode == 2) {
3574  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3575  } else
3576 #endif /* KMP_GOMP_COMPAT */
3577  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3578 
3579  (*f)(lhs, lhs, rhs);
3580 
3581 #ifdef KMP_GOMP_COMPAT
3582  if (__kmp_atomic_mode == 2) {
3583  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3584  } else
3585 #endif /* KMP_GOMP_COMPAT */
3586  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3587  }
3588 }
3589 
3590 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3591  void (*f)(void *, void *, void *)) {
3592  KMP_DEBUG_ASSERT(__kmp_init_serial);
3593 
3594 #ifdef KMP_GOMP_COMPAT
3595  if (__kmp_atomic_mode == 2) {
3596  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3597  } else
3598 #endif /* KMP_GOMP_COMPAT */
3599  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3600 
3601  (*f)(lhs, lhs, rhs);
3602 
3603 #ifdef KMP_GOMP_COMPAT
3604  if (__kmp_atomic_mode == 2) {
3605  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3606  } else
3607 #endif /* KMP_GOMP_COMPAT */
3608  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3609 }
3610 
3611 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3612  void (*f)(void *, void *, void *)) {
3613  KMP_DEBUG_ASSERT(__kmp_init_serial);
3614 
3615 #ifdef KMP_GOMP_COMPAT
3616  if (__kmp_atomic_mode == 2) {
3617  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3618  } else
3619 #endif /* KMP_GOMP_COMPAT */
3620  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3621 
3622  (*f)(lhs, lhs, rhs);
3623 
3624 #ifdef KMP_GOMP_COMPAT
3625  if (__kmp_atomic_mode == 2) {
3626  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3627  } else
3628 #endif /* KMP_GOMP_COMPAT */
3629  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3630 }
3631 
3632 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3633  void (*f)(void *, void *, void *)) {
3634  KMP_DEBUG_ASSERT(__kmp_init_serial);
3635 
3636 #ifdef KMP_GOMP_COMPAT
3637  if (__kmp_atomic_mode == 2) {
3638  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3639  } else
3640 #endif /* KMP_GOMP_COMPAT */
3641  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3642 
3643  (*f)(lhs, lhs, rhs);
3644 
3645 #ifdef KMP_GOMP_COMPAT
3646  if (__kmp_atomic_mode == 2) {
3647  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3648  } else
3649 #endif /* KMP_GOMP_COMPAT */
3650  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3651 }
3652 
3653 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3654  void (*f)(void *, void *, void *)) {
3655  KMP_DEBUG_ASSERT(__kmp_init_serial);
3656 
3657 #ifdef KMP_GOMP_COMPAT
3658  if (__kmp_atomic_mode == 2) {
3659  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3660  } else
3661 #endif /* KMP_GOMP_COMPAT */
3662  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3663 
3664  (*f)(lhs, lhs, rhs);
3665 
3666 #ifdef KMP_GOMP_COMPAT
3667  if (__kmp_atomic_mode == 2) {
3668  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3669  } else
3670 #endif /* KMP_GOMP_COMPAT */
3671  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3672 }
3673 
3674 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3675 // compiler; duplicated in order to not use 3-party names in pure Intel code
3676 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3677 void __kmpc_atomic_start(void) {
3678  int gtid = __kmp_entry_gtid();
3679  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3680  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3681 }
3682 
3683 void __kmpc_atomic_end(void) {
3684  int gtid = __kmp_get_gtid();
3685  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3686  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3687 }
3688 
3693 // end of file
Definition: kmp.h:233