Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_atomic.c
1 /*
2  * kmp_atomic.c -- ATOMIC implementation routines
3  * $Revision: 42810 $
4  * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp_atomic.h"
38 #include "kmp.h" // TRUE, asm routines prototypes
39 
40 typedef unsigned char uchar;
41 typedef unsigned short ushort;
42 
560 /*
561  * Global vars
562  */
563 
564 #ifndef KMP_GOMP_COMPAT
565 int __kmp_atomic_mode = 1; // Intel perf
566 #else
567 int __kmp_atomic_mode = 2; // GOMP compatibility
568 #endif /* KMP_GOMP_COMPAT */
569 
570 KMP_ALIGN(128)
571 
572 kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
573 kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
574 kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
575 kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
576 kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
577 kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
578 kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
579 kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
580 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
581 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
582 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
583 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
584 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
585 
586 
587 /*
588  2007-03-02:
589  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
590  bug on *_32 and *_32e. This is just a temporary workaround for the problem.
591  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
592  routines in assembler language.
593 */
594 #define KMP_ATOMIC_VOLATILE volatile
595 
596 #if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
597 
598  static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
599  static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
600  static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
601  static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
602  static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
603  static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
604 
605  static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
606  static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
607  static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
608  static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
609  static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
610  static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
611 
612  static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
613  static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
614  static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
615  static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
616 
617  static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
618  static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
619  static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
620  static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
621 
622 #endif
623 
624 /* ------------------------------------------------------------------------ */
625 /* ATOMIC implementation routines */
626 /* one routine for each operation and operand type */
627 /* ------------------------------------------------------------------------ */
628 
629 // All routines declarations looks like
630 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
631 // ------------------------------------------------------------------------
632 
633 #define KMP_CHECK_GTID \
634  if ( gtid == KMP_GTID_UNKNOWN ) { \
635  gtid = __kmp_entry_gtid(); \
636  } // check and get gtid when needed
637 
638 // Beginning of a definition (provides name, parameters, gebug trace)
639 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
640 // OP_ID - operation identifier (add, sub, mul, ...)
641 // TYPE - operands' type
642 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
643 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
644 { \
645  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
646  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
647 
648 // ------------------------------------------------------------------------
649 // Lock variables used for critical sections for various size operands
650 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
651 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
652 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
653 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
654 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
655 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
656 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
657 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
658 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
659 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
660 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
661 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
662 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
663 
664 // ------------------------------------------------------------------------
665 // Operation on *lhs, rhs bound by critical section
666 // OP - operator (it's supposed to contain an assignment)
667 // LCK_ID - lock identifier
668 // Note: don't check gtid as it should always be valid
669 // 1, 2-byte - expect valid parameter, other - check before this macro
670 #define OP_CRITICAL(OP,LCK_ID) \
671  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
672  \
673  (*lhs) OP (rhs); \
674  \
675  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
676 
677 // ------------------------------------------------------------------------
678 // For GNU compatibility, we may need to use a critical section,
679 // even though it is not required by the ISA.
680 //
681 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
682 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
683 // critical section. On Intel(R) 64, all atomic operations are done with fetch
684 // and add or compare and exchange. Therefore, the FLAG parameter to this
685 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
686 // require a critical section, where we predict that they will be implemented
687 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
688 //
689 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
690 // the FLAG parameter should always be 1. If we know that we will be using
691 // a critical section, then we want to make certain that we use the generic
692 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
693 // locks that are specialized based upon the size or type of the data.
694 //
695 // If FLAG is 0, then we are relying on dead code elimination by the build
696 // compiler to get rid of the useless block of code, and save a needless
697 // branch at runtime.
698 //
699 
700 #ifdef KMP_GOMP_COMPAT
701 # define OP_GOMP_CRITICAL(OP,FLAG) \
702  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
703  KMP_CHECK_GTID; \
704  OP_CRITICAL( OP, 0 ); \
705  return; \
706  }
707 # else
708 # define OP_GOMP_CRITICAL(OP,FLAG)
709 #endif /* KMP_GOMP_COMPAT */
710 
711 #if KMP_MIC
712 # define KMP_DO_PAUSE _mm_delay_32( 30 )
713 #else
714 # define KMP_DO_PAUSE KMP_CPU_PAUSE()
715 #endif /* KMP_MIC */
716 
717 // ------------------------------------------------------------------------
718 // Operation on *lhs, rhs using "compare_and_store" routine
719 // TYPE - operands' type
720 // BITS - size in bits, used to distinguish low level calls
721 // OP - operator
722 // Note: temp_val introduced in order to force the compiler to read
723 // *lhs only once (w/o it the compiler reads *lhs twice)
724 #define OP_CMPXCHG(TYPE,BITS,OP) \
725  { \
726  TYPE KMP_ATOMIC_VOLATILE temp_val; \
727  TYPE old_value, new_value; \
728  temp_val = *lhs; \
729  old_value = temp_val; \
730  new_value = old_value OP rhs; \
731  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
732  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
733  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
734  { \
735  KMP_DO_PAUSE; \
736  \
737  temp_val = *lhs; \
738  old_value = temp_val; \
739  new_value = old_value OP rhs; \
740  } \
741  }
742 
743 #if USE_CMPXCHG_FIX
744 // 2007-06-25:
745 // workaround for C78287 (complex(kind=4) data type)
746 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
747 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
748 // This is a problem of the compiler.
749 // Related tracker is C76005, targeted to 11.0.
750 // I verified the asm of the workaround.
751 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
752  { \
753  char anonym[ ( sizeof( TYPE ) == sizeof( kmp_int##BITS ) ) ? ( 1 ) : ( 0 ) ] = { 1 }; \
754  struct _sss { \
755  TYPE cmp; \
756  kmp_int##BITS *vvv; \
757  }; \
758  struct _sss old_value, new_value; \
759  old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \
760  new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \
761  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
762  new_value.cmp = old_value.cmp OP rhs; \
763  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
764  *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
765  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \
766  { \
767  KMP_DO_PAUSE; \
768  \
769  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
770  new_value.cmp = old_value.cmp OP rhs; \
771  } \
772  }
773 // end of the first part of the workaround for C78287
774 #endif // USE_CMPXCHG_FIX
775 
776 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
777 
778 // ------------------------------------------------------------------------
779 // X86 or X86_64: no alignment problems ====================================
780 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
781 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
782  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
783  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
784  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
785 }
786 // -------------------------------------------------------------------------
787 #define ATOMIC_FLOAT_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
788 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
789  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
790  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
791  KMP_TEST_THEN_ADD_REAL##BITS( lhs, OP rhs ); \
792 }
793 // -------------------------------------------------------------------------
794 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
795 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
796  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
797  OP_CMPXCHG(TYPE,BITS,OP) \
798 }
799 #if USE_CMPXCHG_FIX
800 // -------------------------------------------------------------------------
801 // workaround for C78287 (complex(kind=4) data type)
802 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
803 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
804  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
805  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
806 }
807 // end of the second part of the workaround for C78287
808 #endif
809 
810 #else
811 // -------------------------------------------------------------------------
812 // Code for other architectures that don't handle unaligned accesses.
813 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
814 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
815  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
816  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
817  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
818  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
819  } else { \
820  KMP_CHECK_GTID; \
821  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
822  } \
823 }
824 // -------------------------------------------------------------------------
825 #define ATOMIC_FLOAT_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
826 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
827  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
828  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
829  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
830  } else { \
831  KMP_CHECK_GTID; \
832  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
833  } \
834 }
835 // -------------------------------------------------------------------------
836 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
837 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
838  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
839  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
840  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
841  } else { \
842  KMP_CHECK_GTID; \
843  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
844  } \
845 }
846 #if USE_CMPXCHG_FIX
847 // -------------------------------------------------------------------------
848 // workaround for C78287 (complex(kind=4) data type)
849 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
850 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
851  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
852  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
853  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
854  } else { \
855  KMP_CHECK_GTID; \
856  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
857  } \
858 }
859 // end of the second part of the workaround for C78287
860 #endif // USE_CMPXCHG_FIX
861 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
862 
863 // Routines for ATOMIC 4-byte operands addition and subtraction
864 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add
865 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub
866 
867 #if KMP_MIC
868 ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
869 ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
870 #else
871 ATOMIC_FLOAT_ADD( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
872 ATOMIC_FLOAT_ADD( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
873 #endif // KMP_MIC
874 
875 // Routines for ATOMIC 8-byte operands addition and subtraction
876 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add
877 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub
878 
879 #if KMP_MIC
880 ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
881 ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
882 #else
883 ATOMIC_FLOAT_ADD( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
884 ATOMIC_FLOAT_ADD( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
885 #endif // KMP_MIC
886 
887 // ------------------------------------------------------------------------
888 // Entries definition for integer operands
889 // TYPE_ID - operands type and size (fixed4, float4)
890 // OP_ID - operation identifier (add, sub, mul, ...)
891 // TYPE - operand type
892 // BITS - size in bits, used to distinguish low level calls
893 // OP - operator (used in critical section)
894 // LCK_ID - lock identifier, used to possibly distinguish lock variable
895 // MASK - used for alignment check
896 
897 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
898 // ------------------------------------------------------------------------
899 // Routines for ATOMIC integer operands, other operators
900 // ------------------------------------------------------------------------
901 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
902 ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add
903 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb
904 ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div
905 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div
906 ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul
907 ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb
908 ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl
909 ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr
910 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr
911 ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub
912 ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor
913 ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add
914 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb
915 ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div
916 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div
917 ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul
918 ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb
919 ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl
920 ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr
921 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr
922 ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub
923 ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor
924 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb
925 ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div
926 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div
927 ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul
928 ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb
929 ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl
930 ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr
931 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr
932 ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor
933 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb
934 ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div
935 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div
936 ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul
937 ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb
938 ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl
939 ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr
940 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr
941 ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor
942 ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div
943 ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul
944 ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div
945 ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul
946 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
947 
948 
949 /* ------------------------------------------------------------------------ */
950 /* Routines for C/C++ Reduction operators && and || */
951 /* ------------------------------------------------------------------------ */
952 
953 // ------------------------------------------------------------------------
954 // Need separate macros for &&, || because there is no combined assignment
955 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
956 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
957 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
958  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
959  OP_CRITICAL( = *lhs OP, LCK_ID ) \
960 }
961 
962 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
963 
964 // ------------------------------------------------------------------------
965 // X86 or X86_64: no alignment problems ===================================
966 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
967 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
968  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
969  OP_CMPXCHG(TYPE,BITS,OP) \
970 }
971 
972 #else
973 // ------------------------------------------------------------------------
974 // Code for other architectures that don't handle unaligned accesses.
975 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
976 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
977  OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \
978  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
979  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
980  } else { \
981  KMP_CHECK_GTID; \
982  OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \
983  } \
984 }
985 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
986 
987 ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl
988 ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl
989 ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl
990 ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl
991 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl
992 ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl
993 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl
994 ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl
995 
996 
997 /* ------------------------------------------------------------------------- */
998 /* Routines for Fortran operators that matched no one in C: */
999 /* MAX, MIN, .EQV., .NEQV. */
1000 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1001 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1002 /* ------------------------------------------------------------------------- */
1003 
1004 // -------------------------------------------------------------------------
1005 // MIN and MAX need separate macros
1006 // OP - operator to check if we need any actions?
1007 #define MIN_MAX_CRITSECT(OP,LCK_ID) \
1008  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1009  \
1010  if ( *lhs OP rhs ) { /* still need actions? */ \
1011  *lhs = rhs; \
1012  } \
1013  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1014 
1015 // -------------------------------------------------------------------------
1016 #ifdef KMP_GOMP_COMPAT
1017 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \
1018  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
1019  KMP_CHECK_GTID; \
1020  MIN_MAX_CRITSECT( OP, 0 ); \
1021  return; \
1022  }
1023 #else
1024 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
1025 #endif /* KMP_GOMP_COMPAT */
1026 
1027 // -------------------------------------------------------------------------
1028 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1029  { \
1030  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1031  TYPE old_value; \
1032  temp_val = *lhs; \
1033  old_value = temp_val; \
1034  while ( old_value OP rhs && /* still need actions? */ \
1035  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1036  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1037  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
1038  { \
1039  KMP_CPU_PAUSE(); \
1040  temp_val = *lhs; \
1041  old_value = temp_val; \
1042  } \
1043  }
1044 
1045 // -------------------------------------------------------------------------
1046 // 1-byte, 2-byte operands - use critical section
1047 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1048 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1049  if ( *lhs OP rhs ) { /* need actions? */ \
1050  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1051  MIN_MAX_CRITSECT(OP,LCK_ID) \
1052  } \
1053 }
1054 
1055 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1056 
1057 // -------------------------------------------------------------------------
1058 // X86 or X86_64: no alignment problems ====================================
1059 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1060 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1061  if ( *lhs OP rhs ) { \
1062  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1063  MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1064  } \
1065 }
1066 
1067 #else
1068 // -------------------------------------------------------------------------
1069 // Code for other architectures that don't handle unaligned accesses.
1070 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1071 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1072  if ( *lhs OP rhs ) { \
1073  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1074  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1075  MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1076  } else { \
1077  KMP_CHECK_GTID; \
1078  MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \
1079  } \
1080  } \
1081 }
1082 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1083 
1084 MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1085 MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1086 MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1087 MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1088 MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max
1089 MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min
1090 MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1091 MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1092 MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1093 MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1094 MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1095 MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
1096 #if KMP_HAVE_QUAD
1097 MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
1098 MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
1099 #if ( KMP_ARCH_X86 )
1100  MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
1101  MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
1102 #endif
1103 #endif
1104 // ------------------------------------------------------------------------
1105 // Need separate macros for .EQV. because of the need of complement (~)
1106 // OP ignored for critical sections, ^=~ used instead
1107 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1108 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1109  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1110  OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \
1111 }
1112 
1113 // ------------------------------------------------------------------------
1114 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1115 // ------------------------------------------------------------------------
1116 // X86 or X86_64: no alignment problems ===================================
1117 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1118 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1119  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1120  OP_CMPXCHG(TYPE,BITS,OP) \
1121 }
1122 // ------------------------------------------------------------------------
1123 #else
1124 // ------------------------------------------------------------------------
1125 // Code for other architectures that don't handle unaligned accesses.
1126 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1127 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1128  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \
1129  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1130  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1131  } else { \
1132  KMP_CHECK_GTID; \
1133  OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \
1134  } \
1135 }
1136 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1137 
1138 ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1139 ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1140 ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1141 ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1142 ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1143 ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1144 ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1145 ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1146 
1147 
1148 // ------------------------------------------------------------------------
1149 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1150 // TYPE_ID, OP_ID, TYPE - detailed above
1151 // OP - operator
1152 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1153 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1154 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1155  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1156  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1157 }
1158 
1159 /* ------------------------------------------------------------------------- */
1160 // routines for long double type
1161 ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add
1162 ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
1163 ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
1164 ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
1165 #if KMP_HAVE_QUAD
1166 // routines for _Quad type
1167 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
1168 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
1169 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul
1170 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div
1171 #if ( KMP_ARCH_X86 )
1172  ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16
1173  ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16
1174  ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
1175  ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
1176 #endif
1177 #endif
1178 // routines for complex types
1179 
1180 #if USE_CMPXCHG_FIX
1181 // workaround for C78287 (complex(kind=4) data type)
1182 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
1183 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
1184 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
1185 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
1186 // end of the workaround for C78287
1187 #else
1188 ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add
1189 ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub
1190 ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul
1191 ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div
1192 #endif // USE_CMPXCHG_FIX
1193 
1194 ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
1195 ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
1196 ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul
1197 ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div
1198 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add
1199 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
1200 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
1201 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
1202 #if KMP_HAVE_QUAD
1203 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
1204 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
1205 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
1206 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div
1207 #if ( KMP_ARCH_X86 )
1208  ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16
1209  ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16
1210  ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
1211  ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
1212 #endif
1213 #endif
1214 
1215 #if OMP_40_ENABLED
1216 
1217 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1218 // Supported only on IA-32 architecture and Intel(R) 64
1219 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1220 
1221 // ------------------------------------------------------------------------
1222 // Operation on *lhs, rhs bound by critical section
1223 // OP - operator (it's supposed to contain an assignment)
1224 // LCK_ID - lock identifier
1225 // Note: don't check gtid as it should always be valid
1226 // 1, 2-byte - expect valid parameter, other - check before this macro
1227 #define OP_CRITICAL_REV(OP,LCK_ID) \
1228  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1229  \
1230  (*lhs) = (rhs) OP (*lhs); \
1231  \
1232  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1233 
1234 #ifdef KMP_GOMP_COMPAT
1235 #define OP_GOMP_CRITICAL_REV(OP,FLAG) \
1236  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1237  KMP_CHECK_GTID; \
1238  OP_CRITICAL_REV( OP, 0 ); \
1239  return; \
1240  }
1241 #else
1242 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
1243 #endif /* KMP_GOMP_COMPAT */
1244 
1245 
1246 // Beginning of a definition (provides name, parameters, gebug trace)
1247 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1248 // OP_ID - operation identifier (add, sub, mul, ...)
1249 // TYPE - operands' type
1250 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1251 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1252 { \
1253  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1254  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1255 
1256 // ------------------------------------------------------------------------
1257 // Operation on *lhs, rhs using "compare_and_store" routine
1258 // TYPE - operands' type
1259 // BITS - size in bits, used to distinguish low level calls
1260 // OP - operator
1261 // Note: temp_val introduced in order to force the compiler to read
1262 // *lhs only once (w/o it the compiler reads *lhs twice)
1263 #define OP_CMPXCHG_REV(TYPE,BITS,OP) \
1264  { \
1265  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1266  TYPE old_value, new_value; \
1267  temp_val = *lhs; \
1268  old_value = temp_val; \
1269  new_value = rhs OP old_value; \
1270  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1271  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1272  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1273  { \
1274  KMP_DO_PAUSE; \
1275  \
1276  temp_val = *lhs; \
1277  old_value = temp_val; \
1278  new_value = rhs OP old_value; \
1279  } \
1280  }
1281 
1282 // -------------------------------------------------------------------------
1283 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \
1284 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1285  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1286  OP_CMPXCHG_REV(TYPE,BITS,OP) \
1287 }
1288 
1289 // ------------------------------------------------------------------------
1290 // Entries definition for integer operands
1291 // TYPE_ID - operands type and size (fixed4, float4)
1292 // OP_ID - operation identifier (add, sub, mul, ...)
1293 // TYPE - operand type
1294 // BITS - size in bits, used to distinguish low level calls
1295 // OP - operator (used in critical section)
1296 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1297 
1298 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1299 // ------------------------------------------------------------------------
1300 // Routines for ATOMIC integer operands, other operators
1301 // ------------------------------------------------------------------------
1302 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1303 ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev
1304 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev
1305 ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev
1306 ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev
1307 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev
1308 ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev
1309 
1310 ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev
1311 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev
1312 ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev
1313 ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev
1314 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev
1315 ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev
1316 
1317 ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev
1318 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev
1319 ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev
1320 ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev
1321 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev
1322 ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev
1323 
1324 ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev
1325 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev
1326 ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev
1327 ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev
1328 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev
1329 ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev
1330 
1331 ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev
1332 ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev
1333 
1334 ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev
1335 ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev
1336 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1337 
1338 // ------------------------------------------------------------------------
1339 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1340 // TYPE_ID, OP_ID, TYPE - detailed above
1341 // OP - operator
1342 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1343 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1344 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1345  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1346  OP_CRITICAL_REV(OP,LCK_ID) \
1347 }
1348 
1349 /* ------------------------------------------------------------------------- */
1350 // routines for long double type
1351 ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
1352 ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
1353 #if KMP_HAVE_QUAD
1354 // routines for _Quad type
1355 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
1356 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
1357 #if ( KMP_ARCH_X86 )
1358  ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
1359  ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
1360 #endif
1361 #endif
1362 
1363 // routines for complex types
1364 ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
1365 ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev
1366 ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev
1367 ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
1368 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
1369 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
1370 #if KMP_HAVE_QUAD
1371 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
1372 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
1373 #if ( KMP_ARCH_X86 )
1374  ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
1375  ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
1376 #endif
1377 #endif
1378 
1379 
1380 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1381 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1382 
1383 #endif //OMP_40_ENABLED
1384 
1385 
1386 /* ------------------------------------------------------------------------ */
1387 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1388 /* Note: in order to reduce the total number of types combinations */
1389 /* it is supposed that compiler converts RHS to longest floating type,*/
1390 /* that is _Quad, before call to any of these routines */
1391 /* Conversion to _Quad will be done by the compiler during calculation, */
1392 /* conversion back to TYPE - before the assignment, like: */
1393 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1394 /* Performance penalty expected because of SW emulation use */
1395 /* ------------------------------------------------------------------------ */
1396 
1397 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1398 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1399 { \
1400  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1401  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1402 
1403 // -------------------------------------------------------------------------
1404 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1405 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1406  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1407  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1408 }
1409 
1410 // -------------------------------------------------------------------------
1411 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412 // -------------------------------------------------------------------------
1413 // X86 or X86_64: no alignment problems ====================================
1414 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1415 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1416  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1417  OP_CMPXCHG(TYPE,BITS,OP) \
1418 }
1419 // -------------------------------------------------------------------------
1420 #else
1421 // ------------------------------------------------------------------------
1422 // Code for other architectures that don't handle unaligned accesses.
1423 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1424 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1425  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1426  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1427  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1428  } else { \
1429  KMP_CHECK_GTID; \
1430  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1431  } \
1432 }
1433 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1434 
1435 // RHS=float8
1436 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1437 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1438 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1439 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1440 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8
1441 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8
1442 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1443 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1444 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1445 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1446 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1447 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1448 
1449 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
1450 #if KMP_HAVE_QUAD
1451 ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1452 ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1453 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1454 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1455 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1456 
1457 ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1458 ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1459 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1460 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1461 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1462 
1463 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp
1464 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp
1465 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp
1466 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp
1467 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp
1468 
1469 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1470 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1471 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1472 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1473 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1474 
1475 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1476 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1477 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1478 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1479 
1480 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1481 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1482 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1483 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1484 
1485 ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp
1486 ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
1487 ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
1488 ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
1489 #endif
1490 
1491 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1492 // ------------------------------------------------------------------------
1493 // X86 or X86_64: no alignment problems ====================================
1494 #if USE_CMPXCHG_FIX
1495 // workaround for C78287 (complex(kind=4) data type)
1496 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1497 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1498  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1499  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
1500 }
1501 // end of the second part of the workaround for C78287
1502 #else
1503 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1504 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1505  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1506  OP_CMPXCHG(TYPE,BITS,OP) \
1507 }
1508 #endif // USE_CMPXCHG_FIX
1509 #else
1510 // ------------------------------------------------------------------------
1511 // Code for other architectures that don't handle unaligned accesses.
1512 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1513 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1514  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1515  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1516  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1517  } else { \
1518  KMP_CHECK_GTID; \
1519  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1520  } \
1521 }
1522 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1523 
1524 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1525 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1526 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1527 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1528 
1529 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1530 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1531 
1533 // ------------------------------------------------------------------------
1534 // Atomic READ routines
1535 // ------------------------------------------------------------------------
1536 
1537 // ------------------------------------------------------------------------
1538 // Beginning of a definition (provides name, parameters, gebug trace)
1539 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1540 // OP_ID - operation identifier (add, sub, mul, ...)
1541 // TYPE - operands' type
1542 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1543 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1544 { \
1545  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1546  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1547 
1548 // ------------------------------------------------------------------------
1549 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1550 // TYPE - operands' type
1551 // BITS - size in bits, used to distinguish low level calls
1552 // OP - operator
1553 // Note: temp_val introduced in order to force the compiler to read
1554 // *lhs only once (w/o it the compiler reads *lhs twice)
1555 // TODO: check if it is still necessary
1556 // Return old value regardless of the result of "compare & swap# operation
1557 
1558 #define OP_CMPXCHG_READ(TYPE,BITS,OP) \
1559  { \
1560  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1561  union f_i_union { \
1562  TYPE f_val; \
1563  kmp_int##BITS i_val; \
1564  }; \
1565  union f_i_union old_value; \
1566  temp_val = *loc; \
1567  old_value.f_val = temp_val; \
1568  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
1569  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \
1570  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1571  new_value = old_value.f_val; \
1572  return new_value; \
1573  }
1574 
1575 // -------------------------------------------------------------------------
1576 // Operation on *lhs, rhs bound by critical section
1577 // OP - operator (it's supposed to contain an assignment)
1578 // LCK_ID - lock identifier
1579 // Note: don't check gtid as it should always be valid
1580 // 1, 2-byte - expect valid parameter, other - check before this macro
1581 #define OP_CRITICAL_READ(OP,LCK_ID) \
1582  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1583  \
1584  new_value = (*loc); \
1585  \
1586  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1587 
1588 // -------------------------------------------------------------------------
1589 #ifdef KMP_GOMP_COMPAT
1590 #define OP_GOMP_CRITICAL_READ(OP,FLAG) \
1591  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1592  KMP_CHECK_GTID; \
1593  OP_CRITICAL_READ( OP, 0 ); \
1594  return new_value; \
1595  }
1596 #else
1597 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
1598 #endif /* KMP_GOMP_COMPAT */
1599 
1600 // -------------------------------------------------------------------------
1601 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1602 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1603  TYPE new_value; \
1604  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1605  new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \
1606  return new_value; \
1607 }
1608 // -------------------------------------------------------------------------
1609 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1610 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1611  TYPE new_value; \
1612  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1613  OP_CMPXCHG_READ(TYPE,BITS,OP) \
1614 }
1615 // ------------------------------------------------------------------------
1616 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1617 // TYPE_ID, OP_ID, TYPE - detailed above
1618 // OP - operator
1619 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1620 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1621 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1622  TYPE new_value; \
1623  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \
1624  OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \
1625  return new_value; \
1626 }
1627 
1628 // ------------------------------------------------------------------------
1629 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1630 // Let's return the read value through the additional parameter.
1631 
1632 #if ( KMP_OS_WINDOWS )
1633 
1634 #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \
1635  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1636  \
1637  (*out) = (*loc); \
1638  \
1639  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1640 // ------------------------------------------------------------------------
1641 #ifdef KMP_GOMP_COMPAT
1642 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \
1643  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1644  KMP_CHECK_GTID; \
1645  OP_CRITICAL_READ_WRK( OP, 0 ); \
1646  }
1647 #else
1648 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1649 #endif /* KMP_GOMP_COMPAT */
1650 // ------------------------------------------------------------------------
1651 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1652 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1653 { \
1654  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1655  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1656 
1657 // ------------------------------------------------------------------------
1658 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1659 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1660  OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \
1661  OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \
1662 }
1663 
1664 #endif // KMP_OS_WINDOWS
1665 
1666 // ------------------------------------------------------------------------
1667 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1668 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd
1669 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd
1670 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd
1671 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd
1672 
1673 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1674 ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd
1675 ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
1676 
1677 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
1678 #if KMP_HAVE_QUAD
1679 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
1680 #endif // KMP_HAVE_QUAD
1681 
1682 // Fix for CQ220361 on Windows* OS
1683 #if ( KMP_OS_WINDOWS )
1684  ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1685 #else
1686  ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1687 #endif
1688 ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
1689 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
1690 #if KMP_HAVE_QUAD
1691 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
1692 #if ( KMP_ARCH_X86 )
1693  ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
1694  ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1695 #endif
1696 #endif
1697 
1698 
1699 // ------------------------------------------------------------------------
1700 // Atomic WRITE routines
1701 // ------------------------------------------------------------------------
1702 
1703 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1704 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1705  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1706  KMP_XCHG_FIXED##BITS( lhs, rhs ); \
1707 }
1708 // ------------------------------------------------------------------------
1709 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1710 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1711  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1712  KMP_XCHG_REAL##BITS( lhs, rhs ); \
1713 }
1714 
1715 
1716 // ------------------------------------------------------------------------
1717 // Operation on *lhs, rhs using "compare_and_store" routine
1718 // TYPE - operands' type
1719 // BITS - size in bits, used to distinguish low level calls
1720 // OP - operator
1721 // Note: temp_val introduced in order to force the compiler to read
1722 // *lhs only once (w/o it the compiler reads *lhs twice)
1723 #define OP_CMPXCHG_WR(TYPE,BITS,OP) \
1724  { \
1725  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1726  TYPE old_value, new_value; \
1727  temp_val = *lhs; \
1728  old_value = temp_val; \
1729  new_value = rhs; \
1730  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1731  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1732  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1733  { \
1734  KMP_CPU_PAUSE(); \
1735  \
1736  temp_val = *lhs; \
1737  old_value = temp_val; \
1738  new_value = rhs; \
1739  } \
1740  }
1741 
1742 // -------------------------------------------------------------------------
1743 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1744 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1745  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1746  OP_CMPXCHG_WR(TYPE,BITS,OP) \
1747 }
1748 
1749 // ------------------------------------------------------------------------
1750 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1751 // TYPE_ID, OP_ID, TYPE - detailed above
1752 // OP - operator
1753 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1754 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1755 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1756  OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \
1757  OP_CRITICAL(OP,LCK_ID) /* send assignment */ \
1758 }
1759 // -------------------------------------------------------------------------
1760 
1761 ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr
1762 ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr
1763 ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr
1764 #if ( KMP_ARCH_X86 )
1765  ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1766 #else
1767  ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1768 #endif
1769 
1770 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr
1771 #if ( KMP_ARCH_X86 )
1772  ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1773 #else
1774  ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1775 #endif
1776 
1777 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
1778 #if KMP_HAVE_QUAD
1779 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
1780 #endif
1781 ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
1782 ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
1783 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
1784 #if KMP_HAVE_QUAD
1785 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
1786 #if ( KMP_ARCH_X86 )
1787  ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1788  ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1789 #endif
1790 #endif
1791 
1792 
1793 // ------------------------------------------------------------------------
1794 // Atomic CAPTURE routines
1795 // ------------------------------------------------------------------------
1796 
1797 // Beginning of a definition (provides name, parameters, gebug trace)
1798 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1799 // OP_ID - operation identifier (add, sub, mul, ...)
1800 // TYPE - operands' type
1801 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \
1802 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1803 { \
1804  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1805  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1806 
1807 // -------------------------------------------------------------------------
1808 // Operation on *lhs, rhs bound by critical section
1809 // OP - operator (it's supposed to contain an assignment)
1810 // LCK_ID - lock identifier
1811 // Note: don't check gtid as it should always be valid
1812 // 1, 2-byte - expect valid parameter, other - check before this macro
1813 #define OP_CRITICAL_CPT(OP,LCK_ID) \
1814  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1815  \
1816  if( flag ) { \
1817  (*lhs) OP rhs; \
1818  new_value = (*lhs); \
1819  } else { \
1820  new_value = (*lhs); \
1821  (*lhs) OP rhs; \
1822  } \
1823  \
1824  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1825  return new_value;
1826 
1827 // ------------------------------------------------------------------------
1828 #ifdef KMP_GOMP_COMPAT
1829 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \
1830  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1831  KMP_CHECK_GTID; \
1832  OP_CRITICAL_CPT( OP##=, 0 ); \
1833  }
1834 #else
1835 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1836 #endif /* KMP_GOMP_COMPAT */
1837 
1838 // ------------------------------------------------------------------------
1839 // Operation on *lhs, rhs using "compare_and_store" routine
1840 // TYPE - operands' type
1841 // BITS - size in bits, used to distinguish low level calls
1842 // OP - operator
1843 // Note: temp_val introduced in order to force the compiler to read
1844 // *lhs only once (w/o it the compiler reads *lhs twice)
1845 #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1846  { \
1847  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1848  TYPE old_value, new_value; \
1849  temp_val = *lhs; \
1850  old_value = temp_val; \
1851  new_value = old_value OP rhs; \
1852  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1853  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1854  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1855  { \
1856  KMP_CPU_PAUSE(); \
1857  \
1858  temp_val = *lhs; \
1859  old_value = temp_val; \
1860  new_value = old_value OP rhs; \
1861  } \
1862  if( flag ) { \
1863  return new_value; \
1864  } else \
1865  return old_value; \
1866  }
1867 
1868 // -------------------------------------------------------------------------
1869 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1870 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1871  TYPE new_value; \
1872  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1873  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1874 }
1875 
1876 // -------------------------------------------------------------------------
1877 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1878 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1879  TYPE old_value, new_value; \
1880  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1881  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1882  old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
1883  if( flag ) { \
1884  return old_value OP rhs; \
1885  } else \
1886  return old_value; \
1887 }
1888 // -------------------------------------------------------------------------
1889 #define ATOMIC_FLOAT_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1890 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1891  TYPE old_value, new_value; \
1892  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1893  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1894  old_value = KMP_TEST_THEN_ADD_REAL##BITS( lhs, OP rhs ); \
1895  if( flag ) { \
1896  return old_value OP rhs; \
1897  } else \
1898  return old_value; \
1899 }
1900 // -------------------------------------------------------------------------
1901 
1902 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt
1903 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt
1904 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt
1905 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt
1906 
1907 #if KMP_MIC
1908 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1909 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1910 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1911 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1912 #else
1913 ATOMIC_FLOAT_ADD_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1914 ATOMIC_FLOAT_ADD_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1915 ATOMIC_FLOAT_ADD_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1916 ATOMIC_FLOAT_ADD_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1917 #endif // KMP_MIC
1918 
1919 // ------------------------------------------------------------------------
1920 // Entries definition for integer operands
1921 // TYPE_ID - operands type and size (fixed4, float4)
1922 // OP_ID - operation identifier (add, sub, mul, ...)
1923 // TYPE - operand type
1924 // BITS - size in bits, used to distinguish low level calls
1925 // OP - operator (used in critical section)
1926 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
1927 // ------------------------------------------------------------------------
1928 // Routines for ATOMIC integer operands, other operators
1929 // ------------------------------------------------------------------------
1930 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1931 ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt
1932 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt
1933 ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt
1934 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt
1935 ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt
1936 ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt
1937 ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt
1938 ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt
1939 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt
1940 ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt
1941 ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt
1942 ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt
1943 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt
1944 ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt
1945 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt
1946 ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt
1947 ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt
1948 ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt
1949 ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt
1950 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt
1951 ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt
1952 ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt
1953 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt
1954 ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt
1955 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt
1956 ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt
1957 ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt
1958 ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt
1959 ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt
1960 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt
1961 ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt
1962 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt
1963 ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt
1964 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt
1965 ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt
1966 ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt
1967 ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt
1968 ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt
1969 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt
1970 ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt
1971 ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt
1972 ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt
1973 ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt
1974 ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt
1975 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1976 
1977 // ------------------------------------------------------------------------
1978 // Routines for C/C++ Reduction operators && and ||
1979 // ------------------------------------------------------------------------
1980 
1981 // -------------------------------------------------------------------------
1982 // Operation on *lhs, rhs bound by critical section
1983 // OP - operator (it's supposed to contain an assignment)
1984 // LCK_ID - lock identifier
1985 // Note: don't check gtid as it should always be valid
1986 // 1, 2-byte - expect valid parameter, other - check before this macro
1987 #define OP_CRITICAL_L_CPT(OP,LCK_ID) \
1988  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1989  \
1990  if( flag ) { \
1991  new_value OP rhs; \
1992  } else \
1993  new_value = (*lhs); \
1994  \
1995  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1996 
1997 // ------------------------------------------------------------------------
1998 #ifdef KMP_GOMP_COMPAT
1999 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \
2000  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2001  KMP_CHECK_GTID; \
2002  OP_CRITICAL_L_CPT( OP, 0 ); \
2003  return new_value; \
2004  }
2005 #else
2006 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
2007 #endif /* KMP_GOMP_COMPAT */
2008 
2009 // ------------------------------------------------------------------------
2010 // Need separate macros for &&, || because there is no combined assignment
2011 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2012 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2013  TYPE new_value; \
2014  OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \
2015  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2016 }
2017 
2018 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt
2019 ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt
2020 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt
2021 ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt
2022 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt
2023 ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt
2024 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt
2025 ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt
2026 
2027 
2028 // -------------------------------------------------------------------------
2029 // Routines for Fortran operators that matched no one in C:
2030 // MAX, MIN, .EQV., .NEQV.
2031 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2032 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2033 // -------------------------------------------------------------------------
2034 
2035 // -------------------------------------------------------------------------
2036 // MIN and MAX need separate macros
2037 // OP - operator to check if we need any actions?
2038 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2039  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2040  \
2041  if ( *lhs OP rhs ) { /* still need actions? */ \
2042  old_value = *lhs; \
2043  *lhs = rhs; \
2044  if ( flag ) \
2045  new_value = rhs; \
2046  else \
2047  new_value = old_value; \
2048  } \
2049  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2050  return new_value; \
2051 
2052 // -------------------------------------------------------------------------
2053 #ifdef KMP_GOMP_COMPAT
2054 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \
2055  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
2056  KMP_CHECK_GTID; \
2057  MIN_MAX_CRITSECT_CPT( OP, 0 ); \
2058  }
2059 #else
2060 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
2061 #endif /* KMP_GOMP_COMPAT */
2062 
2063 // -------------------------------------------------------------------------
2064 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2065  { \
2066  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2067  /*TYPE old_value; */ \
2068  temp_val = *lhs; \
2069  old_value = temp_val; \
2070  while ( old_value OP rhs && /* still need actions? */ \
2071  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2072  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2073  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
2074  { \
2075  KMP_CPU_PAUSE(); \
2076  temp_val = *lhs; \
2077  old_value = temp_val; \
2078  } \
2079  if( flag ) \
2080  return rhs; \
2081  else \
2082  return old_value; \
2083  }
2084 
2085 // -------------------------------------------------------------------------
2086 // 1-byte, 2-byte operands - use critical section
2087 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2088 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2089  TYPE new_value, old_value; \
2090  if ( *lhs OP rhs ) { /* need actions? */ \
2091  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2092  MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2093  } \
2094  return *lhs; \
2095 }
2096 
2097 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2098 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2099  TYPE new_value, old_value; \
2100  if ( *lhs OP rhs ) { \
2101  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2102  MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2103  } \
2104  return *lhs; \
2105 }
2106 
2107 
2108 MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2109 MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2110 MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2111 MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2112 MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt
2113 MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt
2114 MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2115 MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2116 MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2117 MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2118 MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2119 MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
2120 #if KMP_HAVE_QUAD
2121 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
2122 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
2123 #if ( KMP_ARCH_X86 )
2124  MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
2125  MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
2126 #endif
2127 #endif
2128 
2129 // ------------------------------------------------------------------------
2130 #ifdef KMP_GOMP_COMPAT
2131 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \
2132  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2133  KMP_CHECK_GTID; \
2134  OP_CRITICAL_CPT( OP, 0 ); \
2135  }
2136 #else
2137 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2138 #endif /* KMP_GOMP_COMPAT */
2139 // ------------------------------------------------------------------------
2140 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2141 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2142  TYPE new_value; \
2143  OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \
2144  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2145 }
2146 
2147 // ------------------------------------------------------------------------
2148 
2149 ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2150 ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2151 ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2152 ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2153 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2154 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2155 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2156 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2157 
2158 // ------------------------------------------------------------------------
2159 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2160 // TYPE_ID, OP_ID, TYPE - detailed above
2161 // OP - operator
2162 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2163 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2164 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2165  TYPE new_value; \
2166  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2167  OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2168 }
2169 
2170 // ------------------------------------------------------------------------
2171 
2172 // Workaround for cmplx4. Regular routines with return value don't work
2173 // on Win_32e. Let's return captured values through the additional parameter.
2174 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \
2175  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2176  \
2177  if( flag ) { \
2178  (*lhs) OP rhs; \
2179  (*out) = (*lhs); \
2180  } else { \
2181  (*out) = (*lhs); \
2182  (*lhs) OP rhs; \
2183  } \
2184  \
2185  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2186  return;
2187 // ------------------------------------------------------------------------
2188 
2189 #ifdef KMP_GOMP_COMPAT
2190 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \
2191  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2192  KMP_CHECK_GTID; \
2193  OP_CRITICAL_CPT_WRK( OP##=, 0 ); \
2194  }
2195 #else
2196 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2197 #endif /* KMP_GOMP_COMPAT */
2198 // ------------------------------------------------------------------------
2199 
2200 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2201 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2202 { \
2203  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2204  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2205 // ------------------------------------------------------------------------
2206 
2207 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2208 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2209  OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \
2210  OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \
2211 }
2212 // The end of workaround for cmplx4
2213 
2214 /* ------------------------------------------------------------------------- */
2215 // routines for long double type
2216 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt
2217 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
2218 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
2219 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
2220 #if KMP_HAVE_QUAD
2221 // routines for _Quad type
2222 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
2223 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
2224 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt
2225 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt
2226 #if ( KMP_ARCH_X86 )
2227  ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt
2228  ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt
2229  ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
2230  ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
2231 #endif
2232 #endif
2233 
2234 // routines for complex types
2235 
2236 // cmplx4 routines to return void
2237 ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt
2238 ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt
2239 ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt
2240 ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt
2241 
2242 ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt
2243 ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt
2244 ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt
2245 ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt
2246 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt
2247 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
2248 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
2249 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
2250 #if KMP_HAVE_QUAD
2251 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
2252 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
2253 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
2254 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt
2255 #if ( KMP_ARCH_X86 )
2256  ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt
2257  ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt
2258  ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
2259  ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
2260 #endif
2261 #endif
2262 
2263 #if OMP_40_ENABLED
2264 
2265 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
2266 // Supported only on IA-32 architecture and Intel(R) 64
2267 
2268 // -------------------------------------------------------------------------
2269 // Operation on *lhs, rhs bound by critical section
2270 // OP - operator (it's supposed to contain an assignment)
2271 // LCK_ID - lock identifier
2272 // Note: don't check gtid as it should always be valid
2273 // 1, 2-byte - expect valid parameter, other - check before this macro
2274 #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2275  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2276  \
2277  if( flag ) { \
2278  /*temp_val = (*lhs);*/\
2279  (*lhs) = (rhs) OP (*lhs); \
2280  new_value = (*lhs); \
2281  } else { \
2282  new_value = (*lhs);\
2283  (*lhs) = (rhs) OP (*lhs); \
2284  } \
2285  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2286  return new_value;
2287 
2288 // ------------------------------------------------------------------------
2289 #ifdef KMP_GOMP_COMPAT
2290 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \
2291  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2292  KMP_CHECK_GTID; \
2293  OP_CRITICAL_CPT_REV( OP, 0 ); \
2294  }
2295 #else
2296 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2297 #endif /* KMP_GOMP_COMPAT */
2298 
2299 // ------------------------------------------------------------------------
2300 // Operation on *lhs, rhs using "compare_and_store" routine
2301 // TYPE - operands' type
2302 // BITS - size in bits, used to distinguish low level calls
2303 // OP - operator
2304 // Note: temp_val introduced in order to force the compiler to read
2305 // *lhs only once (w/o it the compiler reads *lhs twice)
2306 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2307  { \
2308  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2309  TYPE old_value, new_value; \
2310  temp_val = *lhs; \
2311  old_value = temp_val; \
2312  new_value = rhs OP old_value; \
2313  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2314  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2315  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2316  { \
2317  KMP_CPU_PAUSE(); \
2318  \
2319  temp_val = *lhs; \
2320  old_value = temp_val; \
2321  new_value = rhs OP old_value; \
2322  } \
2323  if( flag ) { \
2324  return new_value; \
2325  } else \
2326  return old_value; \
2327  }
2328 
2329 // -------------------------------------------------------------------------
2330 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2331 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2332  TYPE new_value; \
2333  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2334  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2335  OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2336 }
2337 
2338 
2339 ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev
2340 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev
2341 ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev
2342 ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev
2343 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev
2344 ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev
2345 ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev
2346 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev
2347 ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev
2348 ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev
2349 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev
2350 ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev
2351 ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev
2352 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev
2353 ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev
2354 ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev
2355 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev
2356 ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev
2357 ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev
2358 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev
2359 ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev
2360 ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev
2361 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev
2362 ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev
2363 ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev
2364 ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev
2365 ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev
2366 ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev
2367 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2368 
2369 
2370 // ------------------------------------------------------------------------
2371 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2372 // TYPE_ID, OP_ID, TYPE - detailed above
2373 // OP - operator
2374 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2375 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2376 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2377  TYPE new_value; \
2378  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2379  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2380  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2381  OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2382 }
2383 
2384 
2385 /* ------------------------------------------------------------------------- */
2386 // routines for long double type
2387 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
2388 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
2389 #if KMP_HAVE_QUAD
2390 // routines for _Quad type
2391 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
2392 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
2393 #if ( KMP_ARCH_X86 )
2394  ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
2395  ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
2396 #endif
2397 #endif
2398 
2399 // routines for complex types
2400 
2401 // ------------------------------------------------------------------------
2402 
2403 // Workaround for cmplx4. Regular routines with return value don't work
2404 // on Win_32e. Let's return captured values through the additional parameter.
2405 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2406  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2407  \
2408  if( flag ) { \
2409  (*lhs) = (rhs) OP (*lhs); \
2410  (*out) = (*lhs); \
2411  } else { \
2412  (*out) = (*lhs); \
2413  (*lhs) = (rhs) OP (*lhs); \
2414  } \
2415  \
2416  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2417  return;
2418 // ------------------------------------------------------------------------
2419 
2420 #ifdef KMP_GOMP_COMPAT
2421 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \
2422  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2423  KMP_CHECK_GTID; \
2424  OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \
2425  }
2426 #else
2427 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2428 #endif /* KMP_GOMP_COMPAT */
2429 // ------------------------------------------------------------------------
2430 
2431 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2432 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2433  OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \
2434  OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2435 }
2436 // The end of workaround for cmplx4
2437 
2438 
2439 // !!! TODO: check if we need to return void for cmplx4 routines
2440 // cmplx4 routines to return void
2441 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev
2442 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev
2443 
2444 ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev
2445 ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
2446 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
2447 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
2448 #if KMP_HAVE_QUAD
2449 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
2450 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
2451 #if ( KMP_ARCH_X86 )
2452  ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2453  ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2454 #endif
2455 #endif
2456 
2457 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2458 
2459 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2460 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
2461 { \
2462  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2463  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2464 
2465 #define CRITICAL_SWP(LCK_ID) \
2466  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2467  \
2468  old_value = (*lhs); \
2469  (*lhs) = rhs; \
2470  \
2471  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2472  return old_value;
2473 
2474 // ------------------------------------------------------------------------
2475 #ifdef KMP_GOMP_COMPAT
2476 #define GOMP_CRITICAL_SWP(FLAG) \
2477  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2478  KMP_CHECK_GTID; \
2479  CRITICAL_SWP( 0 ); \
2480  }
2481 #else
2482 #define GOMP_CRITICAL_SWP(FLAG)
2483 #endif /* KMP_GOMP_COMPAT */
2484 
2485 
2486 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2487 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2488  TYPE old_value; \
2489  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2490  old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \
2491  return old_value; \
2492 }
2493 // ------------------------------------------------------------------------
2494 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2495 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2496  TYPE old_value; \
2497  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2498  old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \
2499  return old_value; \
2500 }
2501 
2502 // ------------------------------------------------------------------------
2503 #define CMPXCHG_SWP(TYPE,BITS) \
2504  { \
2505  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2506  TYPE old_value, new_value; \
2507  temp_val = *lhs; \
2508  old_value = temp_val; \
2509  new_value = rhs; \
2510  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2511  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2512  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2513  { \
2514  KMP_CPU_PAUSE(); \
2515  \
2516  temp_val = *lhs; \
2517  old_value = temp_val; \
2518  new_value = rhs; \
2519  } \
2520  return old_value; \
2521  }
2522 
2523 // -------------------------------------------------------------------------
2524 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2525 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2526  TYPE old_value; \
2527  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2528  CMPXCHG_SWP(TYPE,BITS) \
2529 }
2530 
2531 ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp
2532 ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp
2533 ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp
2534 
2535 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp
2536 
2537 #if ( KMP_ARCH_X86 )
2538  ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2539  ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2540 #else
2541  ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2542  ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2543 #endif
2544 
2545 // ------------------------------------------------------------------------
2546 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2547 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \
2548 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2549  TYPE old_value; \
2550  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2551  CRITICAL_SWP(LCK_ID) \
2552 }
2553 
2554 // ------------------------------------------------------------------------
2555 
2556 // !!! TODO: check if we need to return void for cmplx4 routines
2557 // Workaround for cmplx4. Regular routines with return value don't work
2558 // on Win_32e. Let's return captured values through the additional parameter.
2559 
2560 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2561 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \
2562 { \
2563  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2564  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2565 
2566 
2567 #define CRITICAL_SWP_WRK(LCK_ID) \
2568  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2569  \
2570  tmp = (*lhs); \
2571  (*lhs) = (rhs); \
2572  (*out) = tmp; \
2573  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2574  return;
2575 
2576 // ------------------------------------------------------------------------
2577 
2578 #ifdef KMP_GOMP_COMPAT
2579 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
2580  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2581  KMP_CHECK_GTID; \
2582  CRITICAL_SWP_WRK( 0 ); \
2583  }
2584 #else
2585 #define GOMP_CRITICAL_SWP_WRK(FLAG)
2586 #endif /* KMP_GOMP_COMPAT */
2587 // ------------------------------------------------------------------------
2588 
2589 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \
2590 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2591  TYPE tmp; \
2592  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
2593  CRITICAL_SWP_WRK(LCK_ID) \
2594 }
2595 // The end of workaround for cmplx4
2596 
2597 
2598 ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
2599 #if KMP_HAVE_QUAD
2600 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
2601 #endif
2602 // cmplx4 routine to return void
2603 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2604 
2605 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2606 
2607 
2608 ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
2609 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
2610 #if KMP_HAVE_QUAD
2611 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
2612 #if ( KMP_ARCH_X86 )
2613  ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
2614  ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
2615 #endif
2616 #endif
2617 
2618 
2619 // End of OpenMP 4.0 Capture
2620 
2621 #endif //OMP_40_ENABLED
2622 
2623 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2624 
2625 
2626 #undef OP_CRITICAL
2627 
2628 /* ------------------------------------------------------------------------ */
2629 /* Generic atomic routines */
2630 /* ------------------------------------------------------------------------ */
2631 
2632 void
2633 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2634 {
2635  KMP_DEBUG_ASSERT( __kmp_init_serial );
2636 
2637  if (
2638 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2639  FALSE /* must use lock */
2640 #else
2641  TRUE
2642 #endif
2643  )
2644  {
2645  kmp_int8 old_value, new_value;
2646 
2647  old_value = *(kmp_int8 *) lhs;
2648  (*f)( &new_value, &old_value, rhs );
2649 
2650  /* TODO: Should this be acquire or release? */
2651  while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2652  *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2653  {
2654  KMP_CPU_PAUSE();
2655 
2656  old_value = *(kmp_int8 *) lhs;
2657  (*f)( &new_value, &old_value, rhs );
2658  }
2659 
2660  return;
2661  }
2662  else {
2663  //
2664  // All 1-byte data is of integer data type.
2665  //
2666 
2667 #ifdef KMP_GOMP_COMPAT
2668  if ( __kmp_atomic_mode == 2 ) {
2669  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2670  }
2671  else
2672 #endif /* KMP_GOMP_COMPAT */
2673  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2674 
2675  (*f)( lhs, lhs, rhs );
2676 
2677 #ifdef KMP_GOMP_COMPAT
2678  if ( __kmp_atomic_mode == 2 ) {
2679  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2680  }
2681  else
2682 #endif /* KMP_GOMP_COMPAT */
2683  __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2684  }
2685 }
2686 
2687 void
2688 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2689 {
2690  if (
2691 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2692  FALSE /* must use lock */
2693 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2694  TRUE /* no alignment problems */
2695 #else
2696  ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */
2697 #endif
2698  )
2699  {
2700  kmp_int16 old_value, new_value;
2701 
2702  old_value = *(kmp_int16 *) lhs;
2703  (*f)( &new_value, &old_value, rhs );
2704 
2705  /* TODO: Should this be acquire or release? */
2706  while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2707  *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2708  {
2709  KMP_CPU_PAUSE();
2710 
2711  old_value = *(kmp_int16 *) lhs;
2712  (*f)( &new_value, &old_value, rhs );
2713  }
2714 
2715  return;
2716  }
2717  else {
2718  //
2719  // All 2-byte data is of integer data type.
2720  //
2721 
2722 #ifdef KMP_GOMP_COMPAT
2723  if ( __kmp_atomic_mode == 2 ) {
2724  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2725  }
2726  else
2727 #endif /* KMP_GOMP_COMPAT */
2728  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2729 
2730  (*f)( lhs, lhs, rhs );
2731 
2732 #ifdef KMP_GOMP_COMPAT
2733  if ( __kmp_atomic_mode == 2 ) {
2734  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2735  }
2736  else
2737 #endif /* KMP_GOMP_COMPAT */
2738  __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2739  }
2740 }
2741 
2742 void
2743 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2744 {
2745  KMP_DEBUG_ASSERT( __kmp_init_serial );
2746 
2747  if (
2748  //
2749  // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2750  // Gomp compatibility is broken if this routine is called for floats.
2751  //
2752 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2753  TRUE /* no alignment problems */
2754 #else
2755  ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */
2756 #endif
2757  )
2758  {
2759  kmp_int32 old_value, new_value;
2760 
2761  old_value = *(kmp_int32 *) lhs;
2762  (*f)( &new_value, &old_value, rhs );
2763 
2764  /* TODO: Should this be acquire or release? */
2765  while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2766  *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2767  {
2768  KMP_CPU_PAUSE();
2769 
2770  old_value = *(kmp_int32 *) lhs;
2771  (*f)( &new_value, &old_value, rhs );
2772  }
2773 
2774  return;
2775  }
2776  else {
2777  //
2778  // Use __kmp_atomic_lock_4i for all 4-byte data,
2779  // even if it isn't of integer data type.
2780  //
2781 
2782 #ifdef KMP_GOMP_COMPAT
2783  if ( __kmp_atomic_mode == 2 ) {
2784  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2785  }
2786  else
2787 #endif /* KMP_GOMP_COMPAT */
2788  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2789 
2790  (*f)( lhs, lhs, rhs );
2791 
2792 #ifdef KMP_GOMP_COMPAT
2793  if ( __kmp_atomic_mode == 2 ) {
2794  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2795  }
2796  else
2797 #endif /* KMP_GOMP_COMPAT */
2798  __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2799  }
2800 }
2801 
2802 void
2803 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2804 {
2805  KMP_DEBUG_ASSERT( __kmp_init_serial );
2806  if (
2807 
2808 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2809  FALSE /* must use lock */
2810 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2811  TRUE /* no alignment problems */
2812 #else
2813  ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */
2814 #endif
2815  )
2816  {
2817  kmp_int64 old_value, new_value;
2818 
2819  old_value = *(kmp_int64 *) lhs;
2820  (*f)( &new_value, &old_value, rhs );
2821  /* TODO: Should this be acquire or release? */
2822  while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2823  *(kmp_int64 *) &old_value,
2824  *(kmp_int64 *) &new_value ) )
2825  {
2826  KMP_CPU_PAUSE();
2827 
2828  old_value = *(kmp_int64 *) lhs;
2829  (*f)( &new_value, &old_value, rhs );
2830  }
2831 
2832  return;
2833  } else {
2834  //
2835  // Use __kmp_atomic_lock_8i for all 8-byte data,
2836  // even if it isn't of integer data type.
2837  //
2838 
2839 #ifdef KMP_GOMP_COMPAT
2840  if ( __kmp_atomic_mode == 2 ) {
2841  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2842  }
2843  else
2844 #endif /* KMP_GOMP_COMPAT */
2845  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2846 
2847  (*f)( lhs, lhs, rhs );
2848 
2849 #ifdef KMP_GOMP_COMPAT
2850  if ( __kmp_atomic_mode == 2 ) {
2851  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2852  }
2853  else
2854 #endif /* KMP_GOMP_COMPAT */
2855  __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2856  }
2857 }
2858 
2859 void
2860 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2861 {
2862  KMP_DEBUG_ASSERT( __kmp_init_serial );
2863 
2864 #ifdef KMP_GOMP_COMPAT
2865  if ( __kmp_atomic_mode == 2 ) {
2866  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2867  }
2868  else
2869 #endif /* KMP_GOMP_COMPAT */
2870  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2871 
2872  (*f)( lhs, lhs, rhs );
2873 
2874 #ifdef KMP_GOMP_COMPAT
2875  if ( __kmp_atomic_mode == 2 ) {
2876  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2877  }
2878  else
2879 #endif /* KMP_GOMP_COMPAT */
2880  __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2881 }
2882 
2883 void
2884 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2885 {
2886  KMP_DEBUG_ASSERT( __kmp_init_serial );
2887 
2888 #ifdef KMP_GOMP_COMPAT
2889  if ( __kmp_atomic_mode == 2 ) {
2890  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2891  }
2892  else
2893 #endif /* KMP_GOMP_COMPAT */
2894  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2895 
2896  (*f)( lhs, lhs, rhs );
2897 
2898 #ifdef KMP_GOMP_COMPAT
2899  if ( __kmp_atomic_mode == 2 ) {
2900  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2901  }
2902  else
2903 #endif /* KMP_GOMP_COMPAT */
2904  __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2905 }
2906 
2907 void
2908 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2909 {
2910  KMP_DEBUG_ASSERT( __kmp_init_serial );
2911 
2912 #ifdef KMP_GOMP_COMPAT
2913  if ( __kmp_atomic_mode == 2 ) {
2914  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2915  }
2916  else
2917 #endif /* KMP_GOMP_COMPAT */
2918  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2919 
2920  (*f)( lhs, lhs, rhs );
2921 
2922 #ifdef KMP_GOMP_COMPAT
2923  if ( __kmp_atomic_mode == 2 ) {
2924  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2925  }
2926  else
2927 #endif /* KMP_GOMP_COMPAT */
2928  __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2929 }
2930 
2931 void
2932 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2933 {
2934  KMP_DEBUG_ASSERT( __kmp_init_serial );
2935 
2936 #ifdef KMP_GOMP_COMPAT
2937  if ( __kmp_atomic_mode == 2 ) {
2938  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2939  }
2940  else
2941 #endif /* KMP_GOMP_COMPAT */
2942  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2943 
2944  (*f)( lhs, lhs, rhs );
2945 
2946 #ifdef KMP_GOMP_COMPAT
2947  if ( __kmp_atomic_mode == 2 ) {
2948  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2949  }
2950  else
2951 #endif /* KMP_GOMP_COMPAT */
2952  __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2953 }
2954 
2955 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
2956 // duplicated in order to not use 3-party names in pure Intel code
2957 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
2958 void
2959 __kmpc_atomic_start(void)
2960 {
2961  int gtid = __kmp_entry_gtid();
2962  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
2963  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
2964 }
2965 
2966 
2967 void
2968 __kmpc_atomic_end(void)
2969 {
2970  int gtid = __kmp_get_gtid();
2971  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
2972  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
2973 }
2974 
2975 /* ------------------------------------------------------------------------ */
2976 /* ------------------------------------------------------------------------ */
2981 // end of file
Definition: kmp.h:200