Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_atomic.c
1 /*
2  * kmp_atomic.c -- ATOMIC implementation routines
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #include "kmp_atomic.h"
36 #include "kmp.h" // TRUE, asm routines prototypes
37 
38 typedef unsigned char uchar;
39 typedef unsigned short ushort;
40 
558 /*
559  * Global vars
560  */
561 
562 #ifndef KMP_GOMP_COMPAT
563 int __kmp_atomic_mode = 1; // Intel perf
564 #else
565 int __kmp_atomic_mode = 2; // GOMP compatibility
566 #endif /* KMP_GOMP_COMPAT */
567 
568 KMP_ALIGN(128)
569 
570 kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
571 kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
572 kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
573 kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
574 kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
575 kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
576 kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
577 kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
578 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
579 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
580 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
581 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
582 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
583 
584 
585 /*
586  2007-03-02:
587  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
588  bug on *_32 and *_32e. This is just a temporary workaround for the problem.
589  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
590  routines in assembler language.
591 */
592 #define KMP_ATOMIC_VOLATILE volatile
593 
594 #if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
595 
596  static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
597  static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
598  static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
599  static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
600  static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
601  static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
602 
603  static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
604  static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
605  static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
606  static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
607  static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
608  static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
609 
610  static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
611  static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
612  static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
613  static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
614 
615  static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
616  static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
617  static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
618  static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
619 
620 #endif
621 
622 /* ------------------------------------------------------------------------ */
623 /* ATOMIC implementation routines */
624 /* one routine for each operation and operand type */
625 /* ------------------------------------------------------------------------ */
626 
627 // All routines declarations looks like
628 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
629 // ------------------------------------------------------------------------
630 
631 #define KMP_CHECK_GTID \
632  if ( gtid == KMP_GTID_UNKNOWN ) { \
633  gtid = __kmp_entry_gtid(); \
634  } // check and get gtid when needed
635 
636 // Beginning of a definition (provides name, parameters, gebug trace)
637 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
638 // OP_ID - operation identifier (add, sub, mul, ...)
639 // TYPE - operands' type
640 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
641 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
642 { \
643  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
644  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
645 
646 // ------------------------------------------------------------------------
647 // Lock variables used for critical sections for various size operands
648 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
649 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
650 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
651 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
652 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
653 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
654 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
655 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
656 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
657 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
658 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
659 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
660 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
661 
662 // ------------------------------------------------------------------------
663 // Operation on *lhs, rhs bound by critical section
664 // OP - operator (it's supposed to contain an assignment)
665 // LCK_ID - lock identifier
666 // Note: don't check gtid as it should always be valid
667 // 1, 2-byte - expect valid parameter, other - check before this macro
668 #define OP_CRITICAL(OP,LCK_ID) \
669  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
670  \
671  (*lhs) OP (rhs); \
672  \
673  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
674 
675 // ------------------------------------------------------------------------
676 // For GNU compatibility, we may need to use a critical section,
677 // even though it is not required by the ISA.
678 //
679 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
680 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
681 // critical section. On Intel(R) 64, all atomic operations are done with fetch
682 // and add or compare and exchange. Therefore, the FLAG parameter to this
683 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
684 // require a critical section, where we predict that they will be implemented
685 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
686 //
687 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
688 // the FLAG parameter should always be 1. If we know that we will be using
689 // a critical section, then we want to make certain that we use the generic
690 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
691 // locks that are specialized based upon the size or type of the data.
692 //
693 // If FLAG is 0, then we are relying on dead code elimination by the build
694 // compiler to get rid of the useless block of code, and save a needless
695 // branch at runtime.
696 //
697 
698 #ifdef KMP_GOMP_COMPAT
699 # define OP_GOMP_CRITICAL(OP,FLAG) \
700  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
701  KMP_CHECK_GTID; \
702  OP_CRITICAL( OP, 0 ); \
703  return; \
704  }
705 # else
706 # define OP_GOMP_CRITICAL(OP,FLAG)
707 #endif /* KMP_GOMP_COMPAT */
708 
709 #if KMP_MIC
710 # define KMP_DO_PAUSE _mm_delay_32( 1 )
711 #else
712 # define KMP_DO_PAUSE KMP_CPU_PAUSE()
713 #endif /* KMP_MIC */
714 
715 // ------------------------------------------------------------------------
716 // Operation on *lhs, rhs using "compare_and_store" routine
717 // TYPE - operands' type
718 // BITS - size in bits, used to distinguish low level calls
719 // OP - operator
720 #define OP_CMPXCHG(TYPE,BITS,OP) \
721  { \
722  TYPE old_value, new_value; \
723  old_value = *(TYPE volatile *)lhs; \
724  new_value = old_value OP rhs; \
725  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
726  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
727  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
728  { \
729  KMP_DO_PAUSE; \
730  \
731  old_value = *(TYPE volatile *)lhs; \
732  new_value = old_value OP rhs; \
733  } \
734  }
735 
736 #if USE_CMPXCHG_FIX
737 // 2007-06-25:
738 // workaround for C78287 (complex(kind=4) data type)
739 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
740 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
741 // This is a problem of the compiler.
742 // Related tracker is C76005, targeted to 11.0.
743 // I verified the asm of the workaround.
744 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
745  { \
746  char anonym[ ( sizeof( TYPE ) == sizeof( kmp_int##BITS ) ) ? ( 1 ) : ( 0 ) ] = { 1 }; \
747  struct _sss { \
748  TYPE cmp; \
749  kmp_int##BITS *vvv; \
750  }; \
751  struct _sss old_value, new_value; \
752  old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \
753  new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \
754  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
755  new_value.cmp = old_value.cmp OP rhs; \
756  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
757  *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
758  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \
759  { \
760  KMP_DO_PAUSE; \
761  \
762  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
763  new_value.cmp = old_value.cmp OP rhs; \
764  } \
765  }
766 // end of the first part of the workaround for C78287
767 #endif // USE_CMPXCHG_FIX
768 
769 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
770 
771 // ------------------------------------------------------------------------
772 // X86 or X86_64: no alignment problems ====================================
773 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
774 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
775  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
776  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
777  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
778 }
779 // -------------------------------------------------------------------------
780 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
781 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
782  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
783  OP_CMPXCHG(TYPE,BITS,OP) \
784 }
785 #if USE_CMPXCHG_FIX
786 // -------------------------------------------------------------------------
787 // workaround for C78287 (complex(kind=4) data type)
788 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
789 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
790  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
791  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
792 }
793 // end of the second part of the workaround for C78287
794 #endif
795 
796 #else
797 // -------------------------------------------------------------------------
798 // Code for other architectures that don't handle unaligned accesses.
799 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
800 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
801  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
802  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
803  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
804  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
805  } else { \
806  KMP_CHECK_GTID; \
807  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
808  } \
809 }
810 // -------------------------------------------------------------------------
811 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
812 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
813  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
814  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
815  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
816  } else { \
817  KMP_CHECK_GTID; \
818  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
819  } \
820 }
821 #if USE_CMPXCHG_FIX
822 // -------------------------------------------------------------------------
823 // workaround for C78287 (complex(kind=4) data type)
824 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
825 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
826  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
827  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
828  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
829  } else { \
830  KMP_CHECK_GTID; \
831  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
832  } \
833 }
834 // end of the second part of the workaround for C78287
835 #endif // USE_CMPXCHG_FIX
836 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
837 
838 // Routines for ATOMIC 4-byte operands addition and subtraction
839 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add
840 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub
841 
842 ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
843 ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
844 
845 // Routines for ATOMIC 8-byte operands addition and subtraction
846 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add
847 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub
848 
849 ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
850 ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
851 
852 // ------------------------------------------------------------------------
853 // Entries definition for integer operands
854 // TYPE_ID - operands type and size (fixed4, float4)
855 // OP_ID - operation identifier (add, sub, mul, ...)
856 // TYPE - operand type
857 // BITS - size in bits, used to distinguish low level calls
858 // OP - operator (used in critical section)
859 // LCK_ID - lock identifier, used to possibly distinguish lock variable
860 // MASK - used for alignment check
861 
862 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
863 // ------------------------------------------------------------------------
864 // Routines for ATOMIC integer operands, other operators
865 // ------------------------------------------------------------------------
866 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
867 ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add
868 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb
869 ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div
870 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div
871 ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul
872 ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb
873 ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl
874 ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr
875 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr
876 ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub
877 ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor
878 ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add
879 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb
880 ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div
881 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div
882 ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul
883 ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb
884 ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl
885 ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr
886 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr
887 ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub
888 ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor
889 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb
890 ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div
891 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div
892 ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul
893 ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb
894 ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl
895 ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr
896 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr
897 ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor
898 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb
899 ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div
900 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div
901 ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul
902 ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb
903 ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl
904 ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr
905 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr
906 ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor
907 ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div
908 ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul
909 ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div
910 ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul
911 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
912 
913 
914 /* ------------------------------------------------------------------------ */
915 /* Routines for C/C++ Reduction operators && and || */
916 /* ------------------------------------------------------------------------ */
917 
918 // ------------------------------------------------------------------------
919 // Need separate macros for &&, || because there is no combined assignment
920 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
921 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
922 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
923  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
924  OP_CRITICAL( = *lhs OP, LCK_ID ) \
925 }
926 
927 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
928 
929 // ------------------------------------------------------------------------
930 // X86 or X86_64: no alignment problems ===================================
931 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
932 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
933  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
934  OP_CMPXCHG(TYPE,BITS,OP) \
935 }
936 
937 #else
938 // ------------------------------------------------------------------------
939 // Code for other architectures that don't handle unaligned accesses.
940 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
941 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
942  OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \
943  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
944  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
945  } else { \
946  KMP_CHECK_GTID; \
947  OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \
948  } \
949 }
950 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
951 
952 ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl
953 ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl
954 ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl
955 ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl
956 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl
957 ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl
958 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl
959 ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl
960 
961 
962 /* ------------------------------------------------------------------------- */
963 /* Routines for Fortran operators that matched no one in C: */
964 /* MAX, MIN, .EQV., .NEQV. */
965 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
966 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
967 /* ------------------------------------------------------------------------- */
968 
969 // -------------------------------------------------------------------------
970 // MIN and MAX need separate macros
971 // OP - operator to check if we need any actions?
972 #define MIN_MAX_CRITSECT(OP,LCK_ID) \
973  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
974  \
975  if ( *lhs OP rhs ) { /* still need actions? */ \
976  *lhs = rhs; \
977  } \
978  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
979 
980 // -------------------------------------------------------------------------
981 #ifdef KMP_GOMP_COMPAT
982 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \
983  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
984  KMP_CHECK_GTID; \
985  MIN_MAX_CRITSECT( OP, 0 ); \
986  return; \
987  }
988 #else
989 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
990 #endif /* KMP_GOMP_COMPAT */
991 
992 // -------------------------------------------------------------------------
993 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
994  { \
995  TYPE KMP_ATOMIC_VOLATILE temp_val; \
996  TYPE old_value; \
997  temp_val = *lhs; \
998  old_value = temp_val; \
999  while ( old_value OP rhs && /* still need actions? */ \
1000  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1001  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1002  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
1003  { \
1004  KMP_CPU_PAUSE(); \
1005  temp_val = *lhs; \
1006  old_value = temp_val; \
1007  } \
1008  }
1009 
1010 // -------------------------------------------------------------------------
1011 // 1-byte, 2-byte operands - use critical section
1012 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1013 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1014  if ( *lhs OP rhs ) { /* need actions? */ \
1015  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1016  MIN_MAX_CRITSECT(OP,LCK_ID) \
1017  } \
1018 }
1019 
1020 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1021 
1022 // -------------------------------------------------------------------------
1023 // X86 or X86_64: no alignment problems ====================================
1024 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1025 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1026  if ( *lhs OP rhs ) { \
1027  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1028  MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1029  } \
1030 }
1031 
1032 #else
1033 // -------------------------------------------------------------------------
1034 // Code for other architectures that don't handle unaligned accesses.
1035 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1036 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1037  if ( *lhs OP rhs ) { \
1038  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1039  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1040  MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1041  } else { \
1042  KMP_CHECK_GTID; \
1043  MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \
1044  } \
1045  } \
1046 }
1047 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1048 
1049 MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1050 MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1051 MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1052 MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1053 MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max
1054 MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min
1055 MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1056 MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1057 MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1058 MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1059 MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1060 MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
1061 #if KMP_HAVE_QUAD
1062 MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
1063 MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
1064 #if ( KMP_ARCH_X86 )
1065  MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
1066  MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
1067 #endif
1068 #endif
1069 // ------------------------------------------------------------------------
1070 // Need separate macros for .EQV. because of the need of complement (~)
1071 // OP ignored for critical sections, ^=~ used instead
1072 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1073 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1074  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1075  OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \
1076 }
1077 
1078 // ------------------------------------------------------------------------
1079 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1080 // ------------------------------------------------------------------------
1081 // X86 or X86_64: no alignment problems ===================================
1082 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1083 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1084  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1085  OP_CMPXCHG(TYPE,BITS,OP) \
1086 }
1087 // ------------------------------------------------------------------------
1088 #else
1089 // ------------------------------------------------------------------------
1090 // Code for other architectures that don't handle unaligned accesses.
1091 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1092 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1093  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \
1094  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1095  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1096  } else { \
1097  KMP_CHECK_GTID; \
1098  OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \
1099  } \
1100 }
1101 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1102 
1103 ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1104 ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1105 ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1106 ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1107 ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1108 ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1109 ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1110 ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1111 
1112 
1113 // ------------------------------------------------------------------------
1114 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1115 // TYPE_ID, OP_ID, TYPE - detailed above
1116 // OP - operator
1117 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1118 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1119 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1120  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1121  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1122 }
1123 
1124 /* ------------------------------------------------------------------------- */
1125 // routines for long double type
1126 ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add
1127 ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
1128 ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
1129 ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
1130 #if KMP_HAVE_QUAD
1131 // routines for _Quad type
1132 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
1133 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
1134 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul
1135 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div
1136 #if ( KMP_ARCH_X86 )
1137  ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16
1138  ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16
1139  ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
1140  ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
1141 #endif
1142 #endif
1143 // routines for complex types
1144 
1145 #if USE_CMPXCHG_FIX
1146 // workaround for C78287 (complex(kind=4) data type)
1147 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
1148 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
1149 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
1150 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
1151 // end of the workaround for C78287
1152 #else
1153 ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add
1154 ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub
1155 ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul
1156 ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div
1157 #endif // USE_CMPXCHG_FIX
1158 
1159 ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
1160 ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
1161 ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul
1162 ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div
1163 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add
1164 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
1165 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
1166 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
1167 #if KMP_HAVE_QUAD
1168 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
1169 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
1170 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
1171 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div
1172 #if ( KMP_ARCH_X86 )
1173  ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16
1174  ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16
1175  ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
1176  ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
1177 #endif
1178 #endif
1179 
1180 #if OMP_40_ENABLED
1181 
1182 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1183 // Supported only on IA-32 architecture and Intel(R) 64
1184 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1185 
1186 // ------------------------------------------------------------------------
1187 // Operation on *lhs, rhs bound by critical section
1188 // OP - operator (it's supposed to contain an assignment)
1189 // LCK_ID - lock identifier
1190 // Note: don't check gtid as it should always be valid
1191 // 1, 2-byte - expect valid parameter, other - check before this macro
1192 #define OP_CRITICAL_REV(OP,LCK_ID) \
1193  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1194  \
1195  (*lhs) = (rhs) OP (*lhs); \
1196  \
1197  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1198 
1199 #ifdef KMP_GOMP_COMPAT
1200 #define OP_GOMP_CRITICAL_REV(OP,FLAG) \
1201  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1202  KMP_CHECK_GTID; \
1203  OP_CRITICAL_REV( OP, 0 ); \
1204  return; \
1205  }
1206 #else
1207 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
1208 #endif /* KMP_GOMP_COMPAT */
1209 
1210 
1211 // Beginning of a definition (provides name, parameters, gebug trace)
1212 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1213 // OP_ID - operation identifier (add, sub, mul, ...)
1214 // TYPE - operands' type
1215 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1216 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1217 { \
1218  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1219  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1220 
1221 // ------------------------------------------------------------------------
1222 // Operation on *lhs, rhs using "compare_and_store" routine
1223 // TYPE - operands' type
1224 // BITS - size in bits, used to distinguish low level calls
1225 // OP - operator
1226 // Note: temp_val introduced in order to force the compiler to read
1227 // *lhs only once (w/o it the compiler reads *lhs twice)
1228 #define OP_CMPXCHG_REV(TYPE,BITS,OP) \
1229  { \
1230  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1231  TYPE old_value, new_value; \
1232  temp_val = *lhs; \
1233  old_value = temp_val; \
1234  new_value = rhs OP old_value; \
1235  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1236  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1237  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1238  { \
1239  KMP_DO_PAUSE; \
1240  \
1241  temp_val = *lhs; \
1242  old_value = temp_val; \
1243  new_value = rhs OP old_value; \
1244  } \
1245  }
1246 
1247 // -------------------------------------------------------------------------
1248 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \
1249 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1250  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1251  OP_CMPXCHG_REV(TYPE,BITS,OP) \
1252 }
1253 
1254 // ------------------------------------------------------------------------
1255 // Entries definition for integer operands
1256 // TYPE_ID - operands type and size (fixed4, float4)
1257 // OP_ID - operation identifier (add, sub, mul, ...)
1258 // TYPE - operand type
1259 // BITS - size in bits, used to distinguish low level calls
1260 // OP - operator (used in critical section)
1261 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1262 
1263 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1264 // ------------------------------------------------------------------------
1265 // Routines for ATOMIC integer operands, other operators
1266 // ------------------------------------------------------------------------
1267 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1268 ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev
1269 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev
1270 ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev
1271 ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev
1272 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev
1273 ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev
1274 
1275 ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev
1276 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev
1277 ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev
1278 ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev
1279 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev
1280 ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev
1281 
1282 ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev
1283 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev
1284 ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev
1285 ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev
1286 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev
1287 ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev
1288 
1289 ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev
1290 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev
1291 ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev
1292 ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev
1293 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev
1294 ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev
1295 
1296 ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev
1297 ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev
1298 
1299 ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev
1300 ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev
1301 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1302 
1303 // ------------------------------------------------------------------------
1304 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1305 // TYPE_ID, OP_ID, TYPE - detailed above
1306 // OP - operator
1307 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1308 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1309 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1310  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1311  OP_CRITICAL_REV(OP,LCK_ID) \
1312 }
1313 
1314 /* ------------------------------------------------------------------------- */
1315 // routines for long double type
1316 ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
1317 ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
1318 #if KMP_HAVE_QUAD
1319 // routines for _Quad type
1320 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
1321 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
1322 #if ( KMP_ARCH_X86 )
1323  ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
1324  ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
1325 #endif
1326 #endif
1327 
1328 // routines for complex types
1329 ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
1330 ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev
1331 ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev
1332 ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
1333 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
1334 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
1335 #if KMP_HAVE_QUAD
1336 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
1337 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
1338 #if ( KMP_ARCH_X86 )
1339  ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
1340  ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
1341 #endif
1342 #endif
1343 
1344 
1345 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1346 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1347 
1348 #endif //OMP_40_ENABLED
1349 
1350 
1351 /* ------------------------------------------------------------------------ */
1352 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1353 /* Note: in order to reduce the total number of types combinations */
1354 /* it is supposed that compiler converts RHS to longest floating type,*/
1355 /* that is _Quad, before call to any of these routines */
1356 /* Conversion to _Quad will be done by the compiler during calculation, */
1357 /* conversion back to TYPE - before the assignment, like: */
1358 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1359 /* Performance penalty expected because of SW emulation use */
1360 /* ------------------------------------------------------------------------ */
1361 
1362 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1363 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1364 { \
1365  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1366  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1367 
1368 // -------------------------------------------------------------------------
1369 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1370 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1371  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1372  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1373 }
1374 
1375 // -------------------------------------------------------------------------
1376 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1377 // -------------------------------------------------------------------------
1378 // X86 or X86_64: no alignment problems ====================================
1379 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1380 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1381  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1382  OP_CMPXCHG(TYPE,BITS,OP) \
1383 }
1384 // -------------------------------------------------------------------------
1385 #else
1386 // ------------------------------------------------------------------------
1387 // Code for other architectures that don't handle unaligned accesses.
1388 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1389 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1390  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1391  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1392  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1393  } else { \
1394  KMP_CHECK_GTID; \
1395  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1396  } \
1397 }
1398 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1399 
1400 // RHS=float8
1401 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1402 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1403 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1404 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1405 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8
1406 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8
1407 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1408 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1409 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1410 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1411 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1412 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1413 
1414 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
1415 #if KMP_HAVE_QUAD
1416 ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1417 ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1418 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1419 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1420 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1421 
1422 ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1423 ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1424 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1425 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1426 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1427 
1428 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp
1429 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp
1430 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp
1431 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp
1432 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp
1433 
1434 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1435 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1436 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1437 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1438 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1439 
1440 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1441 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1442 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1443 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1444 
1445 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1446 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1447 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1448 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1449 
1450 ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp
1451 ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
1452 ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
1453 ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
1454 #endif
1455 
1456 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1457 // ------------------------------------------------------------------------
1458 // X86 or X86_64: no alignment problems ====================================
1459 #if USE_CMPXCHG_FIX
1460 // workaround for C78287 (complex(kind=4) data type)
1461 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1462 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1463  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1464  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
1465 }
1466 // end of the second part of the workaround for C78287
1467 #else
1468 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1469 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1470  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1471  OP_CMPXCHG(TYPE,BITS,OP) \
1472 }
1473 #endif // USE_CMPXCHG_FIX
1474 #else
1475 // ------------------------------------------------------------------------
1476 // Code for other architectures that don't handle unaligned accesses.
1477 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1478 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1479  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1480  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1481  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1482  } else { \
1483  KMP_CHECK_GTID; \
1484  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1485  } \
1486 }
1487 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1488 
1489 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1490 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1491 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1492 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1493 
1494 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1495 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1496 
1498 // ------------------------------------------------------------------------
1499 // Atomic READ routines
1500 // ------------------------------------------------------------------------
1501 
1502 // ------------------------------------------------------------------------
1503 // Beginning of a definition (provides name, parameters, gebug trace)
1504 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1505 // OP_ID - operation identifier (add, sub, mul, ...)
1506 // TYPE - operands' type
1507 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1508 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1509 { \
1510  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1511  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1512 
1513 // ------------------------------------------------------------------------
1514 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1515 // TYPE - operands' type
1516 // BITS - size in bits, used to distinguish low level calls
1517 // OP - operator
1518 // Note: temp_val introduced in order to force the compiler to read
1519 // *lhs only once (w/o it the compiler reads *lhs twice)
1520 // TODO: check if it is still necessary
1521 // Return old value regardless of the result of "compare & swap# operation
1522 
1523 #define OP_CMPXCHG_READ(TYPE,BITS,OP) \
1524  { \
1525  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1526  union f_i_union { \
1527  TYPE f_val; \
1528  kmp_int##BITS i_val; \
1529  }; \
1530  union f_i_union old_value; \
1531  temp_val = *loc; \
1532  old_value.f_val = temp_val; \
1533  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
1534  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \
1535  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1536  new_value = old_value.f_val; \
1537  return new_value; \
1538  }
1539 
1540 // -------------------------------------------------------------------------
1541 // Operation on *lhs, rhs bound by critical section
1542 // OP - operator (it's supposed to contain an assignment)
1543 // LCK_ID - lock identifier
1544 // Note: don't check gtid as it should always be valid
1545 // 1, 2-byte - expect valid parameter, other - check before this macro
1546 #define OP_CRITICAL_READ(OP,LCK_ID) \
1547  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1548  \
1549  new_value = (*loc); \
1550  \
1551  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1552 
1553 // -------------------------------------------------------------------------
1554 #ifdef KMP_GOMP_COMPAT
1555 #define OP_GOMP_CRITICAL_READ(OP,FLAG) \
1556  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1557  KMP_CHECK_GTID; \
1558  OP_CRITICAL_READ( OP, 0 ); \
1559  return new_value; \
1560  }
1561 #else
1562 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
1563 #endif /* KMP_GOMP_COMPAT */
1564 
1565 // -------------------------------------------------------------------------
1566 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1567 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1568  TYPE new_value; \
1569  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1570  new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \
1571  return new_value; \
1572 }
1573 // -------------------------------------------------------------------------
1574 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1575 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1576  TYPE new_value; \
1577  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1578  OP_CMPXCHG_READ(TYPE,BITS,OP) \
1579 }
1580 // ------------------------------------------------------------------------
1581 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1582 // TYPE_ID, OP_ID, TYPE - detailed above
1583 // OP - operator
1584 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1585 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1586 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1587  TYPE new_value; \
1588  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \
1589  OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \
1590  return new_value; \
1591 }
1592 
1593 // ------------------------------------------------------------------------
1594 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1595 // Let's return the read value through the additional parameter.
1596 
1597 #if ( KMP_OS_WINDOWS )
1598 
1599 #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \
1600  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1601  \
1602  (*out) = (*loc); \
1603  \
1604  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1605 // ------------------------------------------------------------------------
1606 #ifdef KMP_GOMP_COMPAT
1607 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \
1608  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1609  KMP_CHECK_GTID; \
1610  OP_CRITICAL_READ_WRK( OP, 0 ); \
1611  }
1612 #else
1613 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1614 #endif /* KMP_GOMP_COMPAT */
1615 // ------------------------------------------------------------------------
1616 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1617 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1618 { \
1619  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1620  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1621 
1622 // ------------------------------------------------------------------------
1623 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1624 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1625  OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \
1626  OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \
1627 }
1628 
1629 #endif // KMP_OS_WINDOWS
1630 
1631 // ------------------------------------------------------------------------
1632 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1633 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd
1634 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd
1635 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd
1636 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd
1637 
1638 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1639 ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd
1640 ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
1641 
1642 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
1643 #if KMP_HAVE_QUAD
1644 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
1645 #endif // KMP_HAVE_QUAD
1646 
1647 // Fix for CQ220361 on Windows* OS
1648 #if ( KMP_OS_WINDOWS )
1649  ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1650 #else
1651  ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1652 #endif
1653 ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
1654 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
1655 #if KMP_HAVE_QUAD
1656 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
1657 #if ( KMP_ARCH_X86 )
1658  ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
1659  ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1660 #endif
1661 #endif
1662 
1663 
1664 // ------------------------------------------------------------------------
1665 // Atomic WRITE routines
1666 // ------------------------------------------------------------------------
1667 
1668 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1669 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1670  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1671  KMP_XCHG_FIXED##BITS( lhs, rhs ); \
1672 }
1673 // ------------------------------------------------------------------------
1674 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1675 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1676  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1677  KMP_XCHG_REAL##BITS( lhs, rhs ); \
1678 }
1679 
1680 
1681 // ------------------------------------------------------------------------
1682 // Operation on *lhs, rhs using "compare_and_store" routine
1683 // TYPE - operands' type
1684 // BITS - size in bits, used to distinguish low level calls
1685 // OP - operator
1686 // Note: temp_val introduced in order to force the compiler to read
1687 // *lhs only once (w/o it the compiler reads *lhs twice)
1688 #define OP_CMPXCHG_WR(TYPE,BITS,OP) \
1689  { \
1690  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1691  TYPE old_value, new_value; \
1692  temp_val = *lhs; \
1693  old_value = temp_val; \
1694  new_value = rhs; \
1695  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1696  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1697  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1698  { \
1699  KMP_CPU_PAUSE(); \
1700  \
1701  temp_val = *lhs; \
1702  old_value = temp_val; \
1703  new_value = rhs; \
1704  } \
1705  }
1706 
1707 // -------------------------------------------------------------------------
1708 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1709 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1710  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1711  OP_CMPXCHG_WR(TYPE,BITS,OP) \
1712 }
1713 
1714 // ------------------------------------------------------------------------
1715 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1716 // TYPE_ID, OP_ID, TYPE - detailed above
1717 // OP - operator
1718 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1719 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1720 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1721  OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \
1722  OP_CRITICAL(OP,LCK_ID) /* send assignment */ \
1723 }
1724 // -------------------------------------------------------------------------
1725 
1726 ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr
1727 ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr
1728 ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr
1729 #if ( KMP_ARCH_X86 )
1730  ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1731 #else
1732  ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1733 #endif
1734 
1735 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr
1736 #if ( KMP_ARCH_X86 )
1737  ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1738 #else
1739  ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1740 #endif
1741 
1742 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
1743 #if KMP_HAVE_QUAD
1744 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
1745 #endif
1746 ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
1747 ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
1748 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
1749 #if KMP_HAVE_QUAD
1750 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
1751 #if ( KMP_ARCH_X86 )
1752  ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1753  ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1754 #endif
1755 #endif
1756 
1757 
1758 // ------------------------------------------------------------------------
1759 // Atomic CAPTURE routines
1760 // ------------------------------------------------------------------------
1761 
1762 // Beginning of a definition (provides name, parameters, gebug trace)
1763 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1764 // OP_ID - operation identifier (add, sub, mul, ...)
1765 // TYPE - operands' type
1766 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \
1767 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1768 { \
1769  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1770  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1771 
1772 // -------------------------------------------------------------------------
1773 // Operation on *lhs, rhs bound by critical section
1774 // OP - operator (it's supposed to contain an assignment)
1775 // LCK_ID - lock identifier
1776 // Note: don't check gtid as it should always be valid
1777 // 1, 2-byte - expect valid parameter, other - check before this macro
1778 #define OP_CRITICAL_CPT(OP,LCK_ID) \
1779  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1780  \
1781  if( flag ) { \
1782  (*lhs) OP rhs; \
1783  new_value = (*lhs); \
1784  } else { \
1785  new_value = (*lhs); \
1786  (*lhs) OP rhs; \
1787  } \
1788  \
1789  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1790  return new_value;
1791 
1792 // ------------------------------------------------------------------------
1793 #ifdef KMP_GOMP_COMPAT
1794 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \
1795  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1796  KMP_CHECK_GTID; \
1797  OP_CRITICAL_CPT( OP##=, 0 ); \
1798  }
1799 #else
1800 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1801 #endif /* KMP_GOMP_COMPAT */
1802 
1803 // ------------------------------------------------------------------------
1804 // Operation on *lhs, rhs using "compare_and_store" routine
1805 // TYPE - operands' type
1806 // BITS - size in bits, used to distinguish low level calls
1807 // OP - operator
1808 // Note: temp_val introduced in order to force the compiler to read
1809 // *lhs only once (w/o it the compiler reads *lhs twice)
1810 #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1811  { \
1812  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1813  TYPE old_value, new_value; \
1814  temp_val = *lhs; \
1815  old_value = temp_val; \
1816  new_value = old_value OP rhs; \
1817  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1818  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1819  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1820  { \
1821  KMP_CPU_PAUSE(); \
1822  \
1823  temp_val = *lhs; \
1824  old_value = temp_val; \
1825  new_value = old_value OP rhs; \
1826  } \
1827  if( flag ) { \
1828  return new_value; \
1829  } else \
1830  return old_value; \
1831  }
1832 
1833 // -------------------------------------------------------------------------
1834 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1835 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1836  TYPE new_value; \
1837  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1838  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1839 }
1840 
1841 // -------------------------------------------------------------------------
1842 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1843 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1844  TYPE old_value, new_value; \
1845  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1846  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1847  old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
1848  if( flag ) { \
1849  return old_value OP rhs; \
1850  } else \
1851  return old_value; \
1852 }
1853 // -------------------------------------------------------------------------
1854 
1855 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt
1856 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt
1857 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt
1858 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt
1859 
1860 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1861 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1862 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1863 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1864 
1865 // ------------------------------------------------------------------------
1866 // Entries definition for integer operands
1867 // TYPE_ID - operands type and size (fixed4, float4)
1868 // OP_ID - operation identifier (add, sub, mul, ...)
1869 // TYPE - operand type
1870 // BITS - size in bits, used to distinguish low level calls
1871 // OP - operator (used in critical section)
1872 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
1873 // ------------------------------------------------------------------------
1874 // Routines for ATOMIC integer operands, other operators
1875 // ------------------------------------------------------------------------
1876 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1877 ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt
1878 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt
1879 ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt
1880 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt
1881 ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt
1882 ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt
1883 ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt
1884 ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt
1885 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt
1886 ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt
1887 ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt
1888 ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt
1889 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt
1890 ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt
1891 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt
1892 ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt
1893 ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt
1894 ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt
1895 ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt
1896 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt
1897 ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt
1898 ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt
1899 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt
1900 ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt
1901 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt
1902 ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt
1903 ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt
1904 ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt
1905 ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt
1906 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt
1907 ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt
1908 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt
1909 ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt
1910 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt
1911 ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt
1912 ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt
1913 ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt
1914 ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt
1915 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt
1916 ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt
1917 ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt
1918 ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt
1919 ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt
1920 ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt
1921 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1922 
1923 // ------------------------------------------------------------------------
1924 // Routines for C/C++ Reduction operators && and ||
1925 // ------------------------------------------------------------------------
1926 
1927 // -------------------------------------------------------------------------
1928 // Operation on *lhs, rhs bound by critical section
1929 // OP - operator (it's supposed to contain an assignment)
1930 // LCK_ID - lock identifier
1931 // Note: don't check gtid as it should always be valid
1932 // 1, 2-byte - expect valid parameter, other - check before this macro
1933 #define OP_CRITICAL_L_CPT(OP,LCK_ID) \
1934  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1935  \
1936  if( flag ) { \
1937  new_value OP rhs; \
1938  } else \
1939  new_value = (*lhs); \
1940  \
1941  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1942 
1943 // ------------------------------------------------------------------------
1944 #ifdef KMP_GOMP_COMPAT
1945 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \
1946  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1947  KMP_CHECK_GTID; \
1948  OP_CRITICAL_L_CPT( OP, 0 ); \
1949  return new_value; \
1950  }
1951 #else
1952 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
1953 #endif /* KMP_GOMP_COMPAT */
1954 
1955 // ------------------------------------------------------------------------
1956 // Need separate macros for &&, || because there is no combined assignment
1957 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1958 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1959  TYPE new_value; \
1960  OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \
1961  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1962 }
1963 
1964 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt
1965 ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt
1966 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt
1967 ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt
1968 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt
1969 ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt
1970 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt
1971 ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt
1972 
1973 
1974 // -------------------------------------------------------------------------
1975 // Routines for Fortran operators that matched no one in C:
1976 // MAX, MIN, .EQV., .NEQV.
1977 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
1978 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
1979 // -------------------------------------------------------------------------
1980 
1981 // -------------------------------------------------------------------------
1982 // MIN and MAX need separate macros
1983 // OP - operator to check if we need any actions?
1984 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
1985  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1986  \
1987  if ( *lhs OP rhs ) { /* still need actions? */ \
1988  old_value = *lhs; \
1989  *lhs = rhs; \
1990  if ( flag ) \
1991  new_value = rhs; \
1992  else \
1993  new_value = old_value; \
1994  } \
1995  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1996  return new_value; \
1997 
1998 // -------------------------------------------------------------------------
1999 #ifdef KMP_GOMP_COMPAT
2000 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \
2001  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
2002  KMP_CHECK_GTID; \
2003  MIN_MAX_CRITSECT_CPT( OP, 0 ); \
2004  }
2005 #else
2006 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
2007 #endif /* KMP_GOMP_COMPAT */
2008 
2009 // -------------------------------------------------------------------------
2010 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2011  { \
2012  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2013  /*TYPE old_value; */ \
2014  temp_val = *lhs; \
2015  old_value = temp_val; \
2016  while ( old_value OP rhs && /* still need actions? */ \
2017  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2018  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2019  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
2020  { \
2021  KMP_CPU_PAUSE(); \
2022  temp_val = *lhs; \
2023  old_value = temp_val; \
2024  } \
2025  if( flag ) \
2026  return rhs; \
2027  else \
2028  return old_value; \
2029  }
2030 
2031 // -------------------------------------------------------------------------
2032 // 1-byte, 2-byte operands - use critical section
2033 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2034 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2035  TYPE new_value, old_value; \
2036  if ( *lhs OP rhs ) { /* need actions? */ \
2037  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2038  MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2039  } \
2040  return *lhs; \
2041 }
2042 
2043 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2044 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2045  TYPE new_value, old_value; \
2046  if ( *lhs OP rhs ) { \
2047  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2048  MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2049  } \
2050  return *lhs; \
2051 }
2052 
2053 
2054 MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2055 MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2056 MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2057 MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2058 MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt
2059 MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt
2060 MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2061 MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2062 MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2063 MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2064 MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2065 MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
2066 #if KMP_HAVE_QUAD
2067 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
2068 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
2069 #if ( KMP_ARCH_X86 )
2070  MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
2071  MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
2072 #endif
2073 #endif
2074 
2075 // ------------------------------------------------------------------------
2076 #ifdef KMP_GOMP_COMPAT
2077 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \
2078  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2079  KMP_CHECK_GTID; \
2080  OP_CRITICAL_CPT( OP, 0 ); \
2081  }
2082 #else
2083 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2084 #endif /* KMP_GOMP_COMPAT */
2085 // ------------------------------------------------------------------------
2086 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2087 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2088  TYPE new_value; \
2089  OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \
2090  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2091 }
2092 
2093 // ------------------------------------------------------------------------
2094 
2095 ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2096 ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2097 ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2098 ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2099 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2100 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2101 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2102 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2103 
2104 // ------------------------------------------------------------------------
2105 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2106 // TYPE_ID, OP_ID, TYPE - detailed above
2107 // OP - operator
2108 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2109 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2110 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2111  TYPE new_value; \
2112  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2113  OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2114 }
2115 
2116 // ------------------------------------------------------------------------
2117 
2118 // Workaround for cmplx4. Regular routines with return value don't work
2119 // on Win_32e. Let's return captured values through the additional parameter.
2120 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \
2121  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2122  \
2123  if( flag ) { \
2124  (*lhs) OP rhs; \
2125  (*out) = (*lhs); \
2126  } else { \
2127  (*out) = (*lhs); \
2128  (*lhs) OP rhs; \
2129  } \
2130  \
2131  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2132  return;
2133 // ------------------------------------------------------------------------
2134 
2135 #ifdef KMP_GOMP_COMPAT
2136 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \
2137  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2138  KMP_CHECK_GTID; \
2139  OP_CRITICAL_CPT_WRK( OP##=, 0 ); \
2140  }
2141 #else
2142 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2143 #endif /* KMP_GOMP_COMPAT */
2144 // ------------------------------------------------------------------------
2145 
2146 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2147 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2148 { \
2149  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2150  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2151 // ------------------------------------------------------------------------
2152 
2153 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2154 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2155  OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \
2156  OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \
2157 }
2158 // The end of workaround for cmplx4
2159 
2160 /* ------------------------------------------------------------------------- */
2161 // routines for long double type
2162 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt
2163 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
2164 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
2165 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
2166 #if KMP_HAVE_QUAD
2167 // routines for _Quad type
2168 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
2169 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
2170 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt
2171 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt
2172 #if ( KMP_ARCH_X86 )
2173  ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt
2174  ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt
2175  ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
2176  ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
2177 #endif
2178 #endif
2179 
2180 // routines for complex types
2181 
2182 // cmplx4 routines to return void
2183 ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt
2184 ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt
2185 ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt
2186 ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt
2187 
2188 ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt
2189 ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt
2190 ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt
2191 ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt
2192 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt
2193 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
2194 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
2195 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
2196 #if KMP_HAVE_QUAD
2197 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
2198 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
2199 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
2200 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt
2201 #if ( KMP_ARCH_X86 )
2202  ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt
2203  ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt
2204  ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
2205  ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
2206 #endif
2207 #endif
2208 
2209 #if OMP_40_ENABLED
2210 
2211 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
2212 // Supported only on IA-32 architecture and Intel(R) 64
2213 
2214 // -------------------------------------------------------------------------
2215 // Operation on *lhs, rhs bound by critical section
2216 // OP - operator (it's supposed to contain an assignment)
2217 // LCK_ID - lock identifier
2218 // Note: don't check gtid as it should always be valid
2219 // 1, 2-byte - expect valid parameter, other - check before this macro
2220 #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2221  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2222  \
2223  if( flag ) { \
2224  /*temp_val = (*lhs);*/\
2225  (*lhs) = (rhs) OP (*lhs); \
2226  new_value = (*lhs); \
2227  } else { \
2228  new_value = (*lhs);\
2229  (*lhs) = (rhs) OP (*lhs); \
2230  } \
2231  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2232  return new_value;
2233 
2234 // ------------------------------------------------------------------------
2235 #ifdef KMP_GOMP_COMPAT
2236 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \
2237  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2238  KMP_CHECK_GTID; \
2239  OP_CRITICAL_CPT_REV( OP, 0 ); \
2240  }
2241 #else
2242 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2243 #endif /* KMP_GOMP_COMPAT */
2244 
2245 // ------------------------------------------------------------------------
2246 // Operation on *lhs, rhs using "compare_and_store" routine
2247 // TYPE - operands' type
2248 // BITS - size in bits, used to distinguish low level calls
2249 // OP - operator
2250 // Note: temp_val introduced in order to force the compiler to read
2251 // *lhs only once (w/o it the compiler reads *lhs twice)
2252 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2253  { \
2254  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2255  TYPE old_value, new_value; \
2256  temp_val = *lhs; \
2257  old_value = temp_val; \
2258  new_value = rhs OP old_value; \
2259  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2260  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2261  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2262  { \
2263  KMP_CPU_PAUSE(); \
2264  \
2265  temp_val = *lhs; \
2266  old_value = temp_val; \
2267  new_value = rhs OP old_value; \
2268  } \
2269  if( flag ) { \
2270  return new_value; \
2271  } else \
2272  return old_value; \
2273  }
2274 
2275 // -------------------------------------------------------------------------
2276 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2277 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2278  TYPE new_value; \
2279  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2280  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2281  OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2282 }
2283 
2284 
2285 ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev
2286 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev
2287 ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev
2288 ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev
2289 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev
2290 ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev
2291 ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev
2292 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev
2293 ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev
2294 ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev
2295 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev
2296 ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev
2297 ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev
2298 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev
2299 ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev
2300 ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev
2301 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev
2302 ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev
2303 ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev
2304 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev
2305 ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev
2306 ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev
2307 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev
2308 ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev
2309 ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev
2310 ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev
2311 ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev
2312 ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev
2313 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2314 
2315 
2316 // ------------------------------------------------------------------------
2317 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2318 // TYPE_ID, OP_ID, TYPE - detailed above
2319 // OP - operator
2320 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2321 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2322 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2323  TYPE new_value; \
2324  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2325  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2326  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2327  OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2328 }
2329 
2330 
2331 /* ------------------------------------------------------------------------- */
2332 // routines for long double type
2333 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
2334 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
2335 #if KMP_HAVE_QUAD
2336 // routines for _Quad type
2337 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
2338 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
2339 #if ( KMP_ARCH_X86 )
2340  ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
2341  ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
2342 #endif
2343 #endif
2344 
2345 // routines for complex types
2346 
2347 // ------------------------------------------------------------------------
2348 
2349 // Workaround for cmplx4. Regular routines with return value don't work
2350 // on Win_32e. Let's return captured values through the additional parameter.
2351 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2352  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2353  \
2354  if( flag ) { \
2355  (*lhs) = (rhs) OP (*lhs); \
2356  (*out) = (*lhs); \
2357  } else { \
2358  (*out) = (*lhs); \
2359  (*lhs) = (rhs) OP (*lhs); \
2360  } \
2361  \
2362  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2363  return;
2364 // ------------------------------------------------------------------------
2365 
2366 #ifdef KMP_GOMP_COMPAT
2367 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \
2368  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2369  KMP_CHECK_GTID; \
2370  OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \
2371  }
2372 #else
2373 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2374 #endif /* KMP_GOMP_COMPAT */
2375 // ------------------------------------------------------------------------
2376 
2377 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2378 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2379  OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \
2380  OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2381 }
2382 // The end of workaround for cmplx4
2383 
2384 
2385 // !!! TODO: check if we need to return void for cmplx4 routines
2386 // cmplx4 routines to return void
2387 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev
2388 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev
2389 
2390 ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev
2391 ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
2392 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
2393 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
2394 #if KMP_HAVE_QUAD
2395 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
2396 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
2397 #if ( KMP_ARCH_X86 )
2398  ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2399  ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2400 #endif
2401 #endif
2402 
2403 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2404 
2405 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2406 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
2407 { \
2408  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2409  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2410 
2411 #define CRITICAL_SWP(LCK_ID) \
2412  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2413  \
2414  old_value = (*lhs); \
2415  (*lhs) = rhs; \
2416  \
2417  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2418  return old_value;
2419 
2420 // ------------------------------------------------------------------------
2421 #ifdef KMP_GOMP_COMPAT
2422 #define GOMP_CRITICAL_SWP(FLAG) \
2423  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2424  KMP_CHECK_GTID; \
2425  CRITICAL_SWP( 0 ); \
2426  }
2427 #else
2428 #define GOMP_CRITICAL_SWP(FLAG)
2429 #endif /* KMP_GOMP_COMPAT */
2430 
2431 
2432 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2433 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2434  TYPE old_value; \
2435  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2436  old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \
2437  return old_value; \
2438 }
2439 // ------------------------------------------------------------------------
2440 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2441 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2442  TYPE old_value; \
2443  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2444  old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \
2445  return old_value; \
2446 }
2447 
2448 // ------------------------------------------------------------------------
2449 #define CMPXCHG_SWP(TYPE,BITS) \
2450  { \
2451  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2452  TYPE old_value, new_value; \
2453  temp_val = *lhs; \
2454  old_value = temp_val; \
2455  new_value = rhs; \
2456  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2457  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2458  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2459  { \
2460  KMP_CPU_PAUSE(); \
2461  \
2462  temp_val = *lhs; \
2463  old_value = temp_val; \
2464  new_value = rhs; \
2465  } \
2466  return old_value; \
2467  }
2468 
2469 // -------------------------------------------------------------------------
2470 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2471 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2472  TYPE old_value; \
2473  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2474  CMPXCHG_SWP(TYPE,BITS) \
2475 }
2476 
2477 ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp
2478 ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp
2479 ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp
2480 
2481 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp
2482 
2483 #if ( KMP_ARCH_X86 )
2484  ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2485  ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2486 #else
2487  ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2488  ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2489 #endif
2490 
2491 // ------------------------------------------------------------------------
2492 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2493 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \
2494 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2495  TYPE old_value; \
2496  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2497  CRITICAL_SWP(LCK_ID) \
2498 }
2499 
2500 // ------------------------------------------------------------------------
2501 
2502 // !!! TODO: check if we need to return void for cmplx4 routines
2503 // Workaround for cmplx4. Regular routines with return value don't work
2504 // on Win_32e. Let's return captured values through the additional parameter.
2505 
2506 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2507 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \
2508 { \
2509  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2510  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2511 
2512 
2513 #define CRITICAL_SWP_WRK(LCK_ID) \
2514  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2515  \
2516  tmp = (*lhs); \
2517  (*lhs) = (rhs); \
2518  (*out) = tmp; \
2519  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2520  return;
2521 
2522 // ------------------------------------------------------------------------
2523 
2524 #ifdef KMP_GOMP_COMPAT
2525 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
2526  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2527  KMP_CHECK_GTID; \
2528  CRITICAL_SWP_WRK( 0 ); \
2529  }
2530 #else
2531 #define GOMP_CRITICAL_SWP_WRK(FLAG)
2532 #endif /* KMP_GOMP_COMPAT */
2533 // ------------------------------------------------------------------------
2534 
2535 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \
2536 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2537  TYPE tmp; \
2538  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
2539  CRITICAL_SWP_WRK(LCK_ID) \
2540 }
2541 // The end of workaround for cmplx4
2542 
2543 
2544 ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
2545 #if KMP_HAVE_QUAD
2546 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
2547 #endif
2548 // cmplx4 routine to return void
2549 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2550 
2551 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2552 
2553 
2554 ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
2555 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
2556 #if KMP_HAVE_QUAD
2557 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
2558 #if ( KMP_ARCH_X86 )
2559  ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
2560  ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
2561 #endif
2562 #endif
2563 
2564 
2565 // End of OpenMP 4.0 Capture
2566 
2567 #endif //OMP_40_ENABLED
2568 
2569 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2570 
2571 
2572 #undef OP_CRITICAL
2573 
2574 /* ------------------------------------------------------------------------ */
2575 /* Generic atomic routines */
2576 /* ------------------------------------------------------------------------ */
2577 
2578 void
2579 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2580 {
2581  KMP_DEBUG_ASSERT( __kmp_init_serial );
2582 
2583  if (
2584 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2585  FALSE /* must use lock */
2586 #else
2587  TRUE
2588 #endif
2589  )
2590  {
2591  kmp_int8 old_value, new_value;
2592 
2593  old_value = *(kmp_int8 *) lhs;
2594  (*f)( &new_value, &old_value, rhs );
2595 
2596  /* TODO: Should this be acquire or release? */
2597  while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2598  *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2599  {
2600  KMP_CPU_PAUSE();
2601 
2602  old_value = *(kmp_int8 *) lhs;
2603  (*f)( &new_value, &old_value, rhs );
2604  }
2605 
2606  return;
2607  }
2608  else {
2609  //
2610  // All 1-byte data is of integer data type.
2611  //
2612 
2613 #ifdef KMP_GOMP_COMPAT
2614  if ( __kmp_atomic_mode == 2 ) {
2615  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2616  }
2617  else
2618 #endif /* KMP_GOMP_COMPAT */
2619  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2620 
2621  (*f)( lhs, lhs, rhs );
2622 
2623 #ifdef KMP_GOMP_COMPAT
2624  if ( __kmp_atomic_mode == 2 ) {
2625  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2626  }
2627  else
2628 #endif /* KMP_GOMP_COMPAT */
2629  __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2630  }
2631 }
2632 
2633 void
2634 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2635 {
2636  if (
2637 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2638  FALSE /* must use lock */
2639 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2640  TRUE /* no alignment problems */
2641 #else
2642  ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */
2643 #endif
2644  )
2645  {
2646  kmp_int16 old_value, new_value;
2647 
2648  old_value = *(kmp_int16 *) lhs;
2649  (*f)( &new_value, &old_value, rhs );
2650 
2651  /* TODO: Should this be acquire or release? */
2652  while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2653  *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2654  {
2655  KMP_CPU_PAUSE();
2656 
2657  old_value = *(kmp_int16 *) lhs;
2658  (*f)( &new_value, &old_value, rhs );
2659  }
2660 
2661  return;
2662  }
2663  else {
2664  //
2665  // All 2-byte data is of integer data type.
2666  //
2667 
2668 #ifdef KMP_GOMP_COMPAT
2669  if ( __kmp_atomic_mode == 2 ) {
2670  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2671  }
2672  else
2673 #endif /* KMP_GOMP_COMPAT */
2674  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2675 
2676  (*f)( lhs, lhs, rhs );
2677 
2678 #ifdef KMP_GOMP_COMPAT
2679  if ( __kmp_atomic_mode == 2 ) {
2680  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2681  }
2682  else
2683 #endif /* KMP_GOMP_COMPAT */
2684  __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2685  }
2686 }
2687 
2688 void
2689 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2690 {
2691  KMP_DEBUG_ASSERT( __kmp_init_serial );
2692 
2693  if (
2694  //
2695  // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2696  // Gomp compatibility is broken if this routine is called for floats.
2697  //
2698 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2699  TRUE /* no alignment problems */
2700 #else
2701  ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */
2702 #endif
2703  )
2704  {
2705  kmp_int32 old_value, new_value;
2706 
2707  old_value = *(kmp_int32 *) lhs;
2708  (*f)( &new_value, &old_value, rhs );
2709 
2710  /* TODO: Should this be acquire or release? */
2711  while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2712  *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2713  {
2714  KMP_CPU_PAUSE();
2715 
2716  old_value = *(kmp_int32 *) lhs;
2717  (*f)( &new_value, &old_value, rhs );
2718  }
2719 
2720  return;
2721  }
2722  else {
2723  //
2724  // Use __kmp_atomic_lock_4i for all 4-byte data,
2725  // even if it isn't of integer data type.
2726  //
2727 
2728 #ifdef KMP_GOMP_COMPAT
2729  if ( __kmp_atomic_mode == 2 ) {
2730  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2731  }
2732  else
2733 #endif /* KMP_GOMP_COMPAT */
2734  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2735 
2736  (*f)( lhs, lhs, rhs );
2737 
2738 #ifdef KMP_GOMP_COMPAT
2739  if ( __kmp_atomic_mode == 2 ) {
2740  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2741  }
2742  else
2743 #endif /* KMP_GOMP_COMPAT */
2744  __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2745  }
2746 }
2747 
2748 void
2749 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2750 {
2751  KMP_DEBUG_ASSERT( __kmp_init_serial );
2752  if (
2753 
2754 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2755  FALSE /* must use lock */
2756 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2757  TRUE /* no alignment problems */
2758 #else
2759  ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */
2760 #endif
2761  )
2762  {
2763  kmp_int64 old_value, new_value;
2764 
2765  old_value = *(kmp_int64 *) lhs;
2766  (*f)( &new_value, &old_value, rhs );
2767  /* TODO: Should this be acquire or release? */
2768  while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2769  *(kmp_int64 *) &old_value,
2770  *(kmp_int64 *) &new_value ) )
2771  {
2772  KMP_CPU_PAUSE();
2773 
2774  old_value = *(kmp_int64 *) lhs;
2775  (*f)( &new_value, &old_value, rhs );
2776  }
2777 
2778  return;
2779  } else {
2780  //
2781  // Use __kmp_atomic_lock_8i for all 8-byte data,
2782  // even if it isn't of integer data type.
2783  //
2784 
2785 #ifdef KMP_GOMP_COMPAT
2786  if ( __kmp_atomic_mode == 2 ) {
2787  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2788  }
2789  else
2790 #endif /* KMP_GOMP_COMPAT */
2791  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2792 
2793  (*f)( lhs, lhs, rhs );
2794 
2795 #ifdef KMP_GOMP_COMPAT
2796  if ( __kmp_atomic_mode == 2 ) {
2797  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2798  }
2799  else
2800 #endif /* KMP_GOMP_COMPAT */
2801  __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2802  }
2803 }
2804 
2805 void
2806 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2807 {
2808  KMP_DEBUG_ASSERT( __kmp_init_serial );
2809 
2810 #ifdef KMP_GOMP_COMPAT
2811  if ( __kmp_atomic_mode == 2 ) {
2812  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2813  }
2814  else
2815 #endif /* KMP_GOMP_COMPAT */
2816  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2817 
2818  (*f)( lhs, lhs, rhs );
2819 
2820 #ifdef KMP_GOMP_COMPAT
2821  if ( __kmp_atomic_mode == 2 ) {
2822  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2823  }
2824  else
2825 #endif /* KMP_GOMP_COMPAT */
2826  __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2827 }
2828 
2829 void
2830 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2831 {
2832  KMP_DEBUG_ASSERT( __kmp_init_serial );
2833 
2834 #ifdef KMP_GOMP_COMPAT
2835  if ( __kmp_atomic_mode == 2 ) {
2836  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2837  }
2838  else
2839 #endif /* KMP_GOMP_COMPAT */
2840  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2841 
2842  (*f)( lhs, lhs, rhs );
2843 
2844 #ifdef KMP_GOMP_COMPAT
2845  if ( __kmp_atomic_mode == 2 ) {
2846  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2847  }
2848  else
2849 #endif /* KMP_GOMP_COMPAT */
2850  __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2851 }
2852 
2853 void
2854 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2855 {
2856  KMP_DEBUG_ASSERT( __kmp_init_serial );
2857 
2858 #ifdef KMP_GOMP_COMPAT
2859  if ( __kmp_atomic_mode == 2 ) {
2860  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2861  }
2862  else
2863 #endif /* KMP_GOMP_COMPAT */
2864  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2865 
2866  (*f)( lhs, lhs, rhs );
2867 
2868 #ifdef KMP_GOMP_COMPAT
2869  if ( __kmp_atomic_mode == 2 ) {
2870  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2871  }
2872  else
2873 #endif /* KMP_GOMP_COMPAT */
2874  __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2875 }
2876 
2877 void
2878 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2879 {
2880  KMP_DEBUG_ASSERT( __kmp_init_serial );
2881 
2882 #ifdef KMP_GOMP_COMPAT
2883  if ( __kmp_atomic_mode == 2 ) {
2884  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2885  }
2886  else
2887 #endif /* KMP_GOMP_COMPAT */
2888  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2889 
2890  (*f)( lhs, lhs, rhs );
2891 
2892 #ifdef KMP_GOMP_COMPAT
2893  if ( __kmp_atomic_mode == 2 ) {
2894  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2895  }
2896  else
2897 #endif /* KMP_GOMP_COMPAT */
2898  __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2899 }
2900 
2901 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
2902 // duplicated in order to not use 3-party names in pure Intel code
2903 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
2904 void
2905 __kmpc_atomic_start(void)
2906 {
2907  int gtid = __kmp_entry_gtid();
2908  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
2909  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
2910 }
2911 
2912 
2913 void
2914 __kmpc_atomic_end(void)
2915 {
2916  int gtid = __kmp_get_gtid();
2917  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
2918  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
2919 }
2920 
2921 /* ------------------------------------------------------------------------ */
2922 /* ------------------------------------------------------------------------ */
2927 // end of file
Definition: kmp.h:221