Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_atomic.c
1 /*
2  * kmp_atomic.c -- ATOMIC implementation routines
3  * $Revision: 43421 $
4  * $Date: 2014-08-28 08:56:10 -0500 (Thu, 28 Aug 2014) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2014 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp_atomic.h"
38 #include "kmp.h" // TRUE, asm routines prototypes
39 
40 typedef unsigned char uchar;
41 typedef unsigned short ushort;
42 
560 /*
561  * Global vars
562  */
563 
564 #ifndef KMP_GOMP_COMPAT
565 int __kmp_atomic_mode = 1; // Intel perf
566 #else
567 int __kmp_atomic_mode = 2; // GOMP compatibility
568 #endif /* KMP_GOMP_COMPAT */
569 
570 KMP_ALIGN(128)
571 
572 kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
573 kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
574 kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
575 kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
576 kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
577 kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
578 kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
579 kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
580 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
581 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
582 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
583 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
584 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
585 
586 
587 /*
588  2007-03-02:
589  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
590  bug on *_32 and *_32e. This is just a temporary workaround for the problem.
591  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
592  routines in assembler language.
593 */
594 #define KMP_ATOMIC_VOLATILE volatile
595 
596 #if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
597 
598  static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
599  static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
600  static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
601  static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
602  static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
603  static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
604 
605  static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
606  static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
607  static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
608  static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
609  static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
610  static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
611 
612  static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
613  static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
614  static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
615  static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
616 
617  static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
618  static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
619  static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
620  static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
621 
622 #endif
623 
624 /* ------------------------------------------------------------------------ */
625 /* ATOMIC implementation routines */
626 /* one routine for each operation and operand type */
627 /* ------------------------------------------------------------------------ */
628 
629 // All routines declarations looks like
630 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
631 // ------------------------------------------------------------------------
632 
633 #define KMP_CHECK_GTID \
634  if ( gtid == KMP_GTID_UNKNOWN ) { \
635  gtid = __kmp_entry_gtid(); \
636  } // check and get gtid when needed
637 
638 // Beginning of a definition (provides name, parameters, gebug trace)
639 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
640 // OP_ID - operation identifier (add, sub, mul, ...)
641 // TYPE - operands' type
642 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
643 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
644 { \
645  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
646  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
647 
648 // ------------------------------------------------------------------------
649 // Lock variables used for critical sections for various size operands
650 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
651 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
652 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
653 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
654 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
655 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
656 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
657 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
658 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
659 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
660 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
661 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
662 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
663 
664 // ------------------------------------------------------------------------
665 // Operation on *lhs, rhs bound by critical section
666 // OP - operator (it's supposed to contain an assignment)
667 // LCK_ID - lock identifier
668 // Note: don't check gtid as it should always be valid
669 // 1, 2-byte - expect valid parameter, other - check before this macro
670 #define OP_CRITICAL(OP,LCK_ID) \
671  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
672  \
673  (*lhs) OP (rhs); \
674  \
675  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
676 
677 // ------------------------------------------------------------------------
678 // For GNU compatibility, we may need to use a critical section,
679 // even though it is not required by the ISA.
680 //
681 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
682 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
683 // critical section. On Intel(R) 64, all atomic operations are done with fetch
684 // and add or compare and exchange. Therefore, the FLAG parameter to this
685 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
686 // require a critical section, where we predict that they will be implemented
687 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
688 //
689 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
690 // the FLAG parameter should always be 1. If we know that we will be using
691 // a critical section, then we want to make certain that we use the generic
692 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
693 // locks that are specialized based upon the size or type of the data.
694 //
695 // If FLAG is 0, then we are relying on dead code elimination by the build
696 // compiler to get rid of the useless block of code, and save a needless
697 // branch at runtime.
698 //
699 
700 #ifdef KMP_GOMP_COMPAT
701 # define OP_GOMP_CRITICAL(OP,FLAG) \
702  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
703  KMP_CHECK_GTID; \
704  OP_CRITICAL( OP, 0 ); \
705  return; \
706  }
707 # else
708 # define OP_GOMP_CRITICAL(OP,FLAG)
709 #endif /* KMP_GOMP_COMPAT */
710 
711 #if KMP_MIC
712 # define KMP_DO_PAUSE _mm_delay_32( 1 )
713 #else
714 # define KMP_DO_PAUSE KMP_CPU_PAUSE()
715 #endif /* KMP_MIC */
716 
717 // ------------------------------------------------------------------------
718 // Operation on *lhs, rhs using "compare_and_store" routine
719 // TYPE - operands' type
720 // BITS - size in bits, used to distinguish low level calls
721 // OP - operator
722 #define OP_CMPXCHG(TYPE,BITS,OP) \
723  { \
724  TYPE old_value, new_value; \
725  old_value = *(TYPE volatile *)lhs; \
726  new_value = old_value OP rhs; \
727  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
728  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
729  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
730  { \
731  KMP_DO_PAUSE; \
732  \
733  old_value = *(TYPE volatile *)lhs; \
734  new_value = old_value OP rhs; \
735  } \
736  }
737 
738 #if USE_CMPXCHG_FIX
739 // 2007-06-25:
740 // workaround for C78287 (complex(kind=4) data type)
741 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
742 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
743 // This is a problem of the compiler.
744 // Related tracker is C76005, targeted to 11.0.
745 // I verified the asm of the workaround.
746 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
747  { \
748  char anonym[ ( sizeof( TYPE ) == sizeof( kmp_int##BITS ) ) ? ( 1 ) : ( 0 ) ] = { 1 }; \
749  struct _sss { \
750  TYPE cmp; \
751  kmp_int##BITS *vvv; \
752  }; \
753  struct _sss old_value, new_value; \
754  old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \
755  new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \
756  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
757  new_value.cmp = old_value.cmp OP rhs; \
758  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
759  *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
760  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \
761  { \
762  KMP_DO_PAUSE; \
763  \
764  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
765  new_value.cmp = old_value.cmp OP rhs; \
766  } \
767  }
768 // end of the first part of the workaround for C78287
769 #endif // USE_CMPXCHG_FIX
770 
771 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
772 
773 // ------------------------------------------------------------------------
774 // X86 or X86_64: no alignment problems ====================================
775 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
776 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
777  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
778  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
779  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
780 }
781 // -------------------------------------------------------------------------
782 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
783 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
784  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
785  OP_CMPXCHG(TYPE,BITS,OP) \
786 }
787 #if USE_CMPXCHG_FIX
788 // -------------------------------------------------------------------------
789 // workaround for C78287 (complex(kind=4) data type)
790 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
791 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
792  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
793  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
794 }
795 // end of the second part of the workaround for C78287
796 #endif
797 
798 #else
799 // -------------------------------------------------------------------------
800 // Code for other architectures that don't handle unaligned accesses.
801 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
802 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
803  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
804  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
805  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
806  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
807  } else { \
808  KMP_CHECK_GTID; \
809  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
810  } \
811 }
812 // -------------------------------------------------------------------------
813 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
814 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
815  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
816  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
817  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
818  } else { \
819  KMP_CHECK_GTID; \
820  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
821  } \
822 }
823 #if USE_CMPXCHG_FIX
824 // -------------------------------------------------------------------------
825 // workaround for C78287 (complex(kind=4) data type)
826 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
827 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
828  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
829  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
830  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
831  } else { \
832  KMP_CHECK_GTID; \
833  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
834  } \
835 }
836 // end of the second part of the workaround for C78287
837 #endif // USE_CMPXCHG_FIX
838 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
839 
840 // Routines for ATOMIC 4-byte operands addition and subtraction
841 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add
842 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub
843 
844 ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
845 ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
846 
847 // Routines for ATOMIC 8-byte operands addition and subtraction
848 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add
849 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub
850 
851 ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
852 ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
853 
854 // ------------------------------------------------------------------------
855 // Entries definition for integer operands
856 // TYPE_ID - operands type and size (fixed4, float4)
857 // OP_ID - operation identifier (add, sub, mul, ...)
858 // TYPE - operand type
859 // BITS - size in bits, used to distinguish low level calls
860 // OP - operator (used in critical section)
861 // LCK_ID - lock identifier, used to possibly distinguish lock variable
862 // MASK - used for alignment check
863 
864 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
865 // ------------------------------------------------------------------------
866 // Routines for ATOMIC integer operands, other operators
867 // ------------------------------------------------------------------------
868 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
869 ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add
870 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb
871 ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div
872 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div
873 ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul
874 ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb
875 ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl
876 ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr
877 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr
878 ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub
879 ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor
880 ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add
881 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb
882 ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div
883 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div
884 ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul
885 ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb
886 ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl
887 ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr
888 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr
889 ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub
890 ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor
891 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb
892 ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div
893 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div
894 ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul
895 ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb
896 ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl
897 ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr
898 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr
899 ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor
900 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb
901 ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div
902 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div
903 ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul
904 ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb
905 ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl
906 ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr
907 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr
908 ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor
909 ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div
910 ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul
911 ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div
912 ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul
913 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
914 
915 
916 /* ------------------------------------------------------------------------ */
917 /* Routines for C/C++ Reduction operators && and || */
918 /* ------------------------------------------------------------------------ */
919 
920 // ------------------------------------------------------------------------
921 // Need separate macros for &&, || because there is no combined assignment
922 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
923 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
924 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
925  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
926  OP_CRITICAL( = *lhs OP, LCK_ID ) \
927 }
928 
929 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
930 
931 // ------------------------------------------------------------------------
932 // X86 or X86_64: no alignment problems ===================================
933 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
934 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
935  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
936  OP_CMPXCHG(TYPE,BITS,OP) \
937 }
938 
939 #else
940 // ------------------------------------------------------------------------
941 // Code for other architectures that don't handle unaligned accesses.
942 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
943 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
944  OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \
945  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
946  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
947  } else { \
948  KMP_CHECK_GTID; \
949  OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \
950  } \
951 }
952 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
953 
954 ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl
955 ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl
956 ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl
957 ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl
958 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl
959 ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl
960 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl
961 ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl
962 
963 
964 /* ------------------------------------------------------------------------- */
965 /* Routines for Fortran operators that matched no one in C: */
966 /* MAX, MIN, .EQV., .NEQV. */
967 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
968 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
969 /* ------------------------------------------------------------------------- */
970 
971 // -------------------------------------------------------------------------
972 // MIN and MAX need separate macros
973 // OP - operator to check if we need any actions?
974 #define MIN_MAX_CRITSECT(OP,LCK_ID) \
975  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
976  \
977  if ( *lhs OP rhs ) { /* still need actions? */ \
978  *lhs = rhs; \
979  } \
980  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
981 
982 // -------------------------------------------------------------------------
983 #ifdef KMP_GOMP_COMPAT
984 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \
985  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
986  KMP_CHECK_GTID; \
987  MIN_MAX_CRITSECT( OP, 0 ); \
988  return; \
989  }
990 #else
991 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
992 #endif /* KMP_GOMP_COMPAT */
993 
994 // -------------------------------------------------------------------------
995 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
996  { \
997  TYPE KMP_ATOMIC_VOLATILE temp_val; \
998  TYPE old_value; \
999  temp_val = *lhs; \
1000  old_value = temp_val; \
1001  while ( old_value OP rhs && /* still need actions? */ \
1002  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1003  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1004  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
1005  { \
1006  KMP_CPU_PAUSE(); \
1007  temp_val = *lhs; \
1008  old_value = temp_val; \
1009  } \
1010  }
1011 
1012 // -------------------------------------------------------------------------
1013 // 1-byte, 2-byte operands - use critical section
1014 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1015 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1016  if ( *lhs OP rhs ) { /* need actions? */ \
1017  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1018  MIN_MAX_CRITSECT(OP,LCK_ID) \
1019  } \
1020 }
1021 
1022 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1023 
1024 // -------------------------------------------------------------------------
1025 // X86 or X86_64: no alignment problems ====================================
1026 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1027 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1028  if ( *lhs OP rhs ) { \
1029  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1030  MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1031  } \
1032 }
1033 
1034 #else
1035 // -------------------------------------------------------------------------
1036 // Code for other architectures that don't handle unaligned accesses.
1037 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1038 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1039  if ( *lhs OP rhs ) { \
1040  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1041  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1042  MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1043  } else { \
1044  KMP_CHECK_GTID; \
1045  MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \
1046  } \
1047  } \
1048 }
1049 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1050 
1051 MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1052 MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1053 MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1054 MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1055 MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max
1056 MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min
1057 MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1058 MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1059 MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1060 MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1061 MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1062 MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
1063 #if KMP_HAVE_QUAD
1064 MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
1065 MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
1066 #if ( KMP_ARCH_X86 )
1067  MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
1068  MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
1069 #endif
1070 #endif
1071 // ------------------------------------------------------------------------
1072 // Need separate macros for .EQV. because of the need of complement (~)
1073 // OP ignored for critical sections, ^=~ used instead
1074 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1075 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1076  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1077  OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \
1078 }
1079 
1080 // ------------------------------------------------------------------------
1081 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1082 // ------------------------------------------------------------------------
1083 // X86 or X86_64: no alignment problems ===================================
1084 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1085 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1086  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1087  OP_CMPXCHG(TYPE,BITS,OP) \
1088 }
1089 // ------------------------------------------------------------------------
1090 #else
1091 // ------------------------------------------------------------------------
1092 // Code for other architectures that don't handle unaligned accesses.
1093 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1094 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1095  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \
1096  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1097  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1098  } else { \
1099  KMP_CHECK_GTID; \
1100  OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \
1101  } \
1102 }
1103 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1104 
1105 ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1106 ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1107 ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1108 ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1109 ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1110 ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1111 ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1112 ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1113 
1114 
1115 // ------------------------------------------------------------------------
1116 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1117 // TYPE_ID, OP_ID, TYPE - detailed above
1118 // OP - operator
1119 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1120 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1121 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1122  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1123  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1124 }
1125 
1126 /* ------------------------------------------------------------------------- */
1127 // routines for long double type
1128 ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add
1129 ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
1130 ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
1131 ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
1132 #if KMP_HAVE_QUAD
1133 // routines for _Quad type
1134 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
1135 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
1136 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul
1137 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div
1138 #if ( KMP_ARCH_X86 )
1139  ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16
1140  ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16
1141  ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
1142  ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
1143 #endif
1144 #endif
1145 // routines for complex types
1146 
1147 #if USE_CMPXCHG_FIX
1148 // workaround for C78287 (complex(kind=4) data type)
1149 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
1150 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
1151 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
1152 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
1153 // end of the workaround for C78287
1154 #else
1155 ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add
1156 ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub
1157 ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul
1158 ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div
1159 #endif // USE_CMPXCHG_FIX
1160 
1161 ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
1162 ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
1163 ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul
1164 ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div
1165 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add
1166 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
1167 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
1168 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
1169 #if KMP_HAVE_QUAD
1170 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
1171 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
1172 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
1173 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div
1174 #if ( KMP_ARCH_X86 )
1175  ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16
1176  ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16
1177  ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
1178  ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
1179 #endif
1180 #endif
1181 
1182 #if OMP_40_ENABLED
1183 
1184 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1185 // Supported only on IA-32 architecture and Intel(R) 64
1186 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1187 
1188 // ------------------------------------------------------------------------
1189 // Operation on *lhs, rhs bound by critical section
1190 // OP - operator (it's supposed to contain an assignment)
1191 // LCK_ID - lock identifier
1192 // Note: don't check gtid as it should always be valid
1193 // 1, 2-byte - expect valid parameter, other - check before this macro
1194 #define OP_CRITICAL_REV(OP,LCK_ID) \
1195  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1196  \
1197  (*lhs) = (rhs) OP (*lhs); \
1198  \
1199  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1200 
1201 #ifdef KMP_GOMP_COMPAT
1202 #define OP_GOMP_CRITICAL_REV(OP,FLAG) \
1203  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1204  KMP_CHECK_GTID; \
1205  OP_CRITICAL_REV( OP, 0 ); \
1206  return; \
1207  }
1208 #else
1209 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
1210 #endif /* KMP_GOMP_COMPAT */
1211 
1212 
1213 // Beginning of a definition (provides name, parameters, gebug trace)
1214 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1215 // OP_ID - operation identifier (add, sub, mul, ...)
1216 // TYPE - operands' type
1217 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1218 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1219 { \
1220  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1221  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1222 
1223 // ------------------------------------------------------------------------
1224 // Operation on *lhs, rhs using "compare_and_store" routine
1225 // TYPE - operands' type
1226 // BITS - size in bits, used to distinguish low level calls
1227 // OP - operator
1228 // Note: temp_val introduced in order to force the compiler to read
1229 // *lhs only once (w/o it the compiler reads *lhs twice)
1230 #define OP_CMPXCHG_REV(TYPE,BITS,OP) \
1231  { \
1232  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1233  TYPE old_value, new_value; \
1234  temp_val = *lhs; \
1235  old_value = temp_val; \
1236  new_value = rhs OP old_value; \
1237  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1238  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1239  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1240  { \
1241  KMP_DO_PAUSE; \
1242  \
1243  temp_val = *lhs; \
1244  old_value = temp_val; \
1245  new_value = rhs OP old_value; \
1246  } \
1247  }
1248 
1249 // -------------------------------------------------------------------------
1250 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \
1251 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1252  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1253  OP_CMPXCHG_REV(TYPE,BITS,OP) \
1254 }
1255 
1256 // ------------------------------------------------------------------------
1257 // Entries definition for integer operands
1258 // TYPE_ID - operands type and size (fixed4, float4)
1259 // OP_ID - operation identifier (add, sub, mul, ...)
1260 // TYPE - operand type
1261 // BITS - size in bits, used to distinguish low level calls
1262 // OP - operator (used in critical section)
1263 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1264 
1265 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1266 // ------------------------------------------------------------------------
1267 // Routines for ATOMIC integer operands, other operators
1268 // ------------------------------------------------------------------------
1269 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1270 ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev
1271 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev
1272 ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev
1273 ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev
1274 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev
1275 ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev
1276 
1277 ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev
1278 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev
1279 ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev
1280 ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev
1281 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev
1282 ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev
1283 
1284 ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev
1285 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev
1286 ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev
1287 ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev
1288 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev
1289 ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev
1290 
1291 ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev
1292 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev
1293 ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev
1294 ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev
1295 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev
1296 ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev
1297 
1298 ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev
1299 ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev
1300 
1301 ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev
1302 ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev
1303 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1304 
1305 // ------------------------------------------------------------------------
1306 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1307 // TYPE_ID, OP_ID, TYPE - detailed above
1308 // OP - operator
1309 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1310 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1311 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1312  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1313  OP_CRITICAL_REV(OP,LCK_ID) \
1314 }
1315 
1316 /* ------------------------------------------------------------------------- */
1317 // routines for long double type
1318 ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
1319 ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
1320 #if KMP_HAVE_QUAD
1321 // routines for _Quad type
1322 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
1323 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
1324 #if ( KMP_ARCH_X86 )
1325  ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
1326  ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
1327 #endif
1328 #endif
1329 
1330 // routines for complex types
1331 ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
1332 ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev
1333 ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev
1334 ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
1335 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
1336 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
1337 #if KMP_HAVE_QUAD
1338 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
1339 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
1340 #if ( KMP_ARCH_X86 )
1341  ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
1342  ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
1343 #endif
1344 #endif
1345 
1346 
1347 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1348 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1349 
1350 #endif //OMP_40_ENABLED
1351 
1352 
1353 /* ------------------------------------------------------------------------ */
1354 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1355 /* Note: in order to reduce the total number of types combinations */
1356 /* it is supposed that compiler converts RHS to longest floating type,*/
1357 /* that is _Quad, before call to any of these routines */
1358 /* Conversion to _Quad will be done by the compiler during calculation, */
1359 /* conversion back to TYPE - before the assignment, like: */
1360 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1361 /* Performance penalty expected because of SW emulation use */
1362 /* ------------------------------------------------------------------------ */
1363 
1364 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1365 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1366 { \
1367  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1368  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1369 
1370 // -------------------------------------------------------------------------
1371 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1372 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1373  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1374  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1375 }
1376 
1377 // -------------------------------------------------------------------------
1378 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379 // -------------------------------------------------------------------------
1380 // X86 or X86_64: no alignment problems ====================================
1381 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1382 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1383  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1384  OP_CMPXCHG(TYPE,BITS,OP) \
1385 }
1386 // -------------------------------------------------------------------------
1387 #else
1388 // ------------------------------------------------------------------------
1389 // Code for other architectures that don't handle unaligned accesses.
1390 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1391 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1392  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1393  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1394  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1395  } else { \
1396  KMP_CHECK_GTID; \
1397  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1398  } \
1399 }
1400 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1401 
1402 // RHS=float8
1403 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1404 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1405 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1406 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1407 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8
1408 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8
1409 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1410 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1411 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1412 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1413 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1414 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1415 
1416 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
1417 #if KMP_HAVE_QUAD
1418 ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1419 ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1420 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1421 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1422 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1423 
1424 ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1425 ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1426 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1427 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1428 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1429 
1430 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp
1431 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp
1432 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp
1433 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp
1434 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp
1435 
1436 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1437 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1438 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1439 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1440 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1441 
1442 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1443 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1444 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1445 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1446 
1447 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1448 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1449 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1450 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1451 
1452 ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp
1453 ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
1454 ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
1455 ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
1456 #endif
1457 
1458 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1459 // ------------------------------------------------------------------------
1460 // X86 or X86_64: no alignment problems ====================================
1461 #if USE_CMPXCHG_FIX
1462 // workaround for C78287 (complex(kind=4) data type)
1463 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1464 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1465  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1466  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
1467 }
1468 // end of the second part of the workaround for C78287
1469 #else
1470 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1471 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1472  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1473  OP_CMPXCHG(TYPE,BITS,OP) \
1474 }
1475 #endif // USE_CMPXCHG_FIX
1476 #else
1477 // ------------------------------------------------------------------------
1478 // Code for other architectures that don't handle unaligned accesses.
1479 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1480 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1481  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1482  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1483  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1484  } else { \
1485  KMP_CHECK_GTID; \
1486  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1487  } \
1488 }
1489 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1490 
1491 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1492 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1493 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1494 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1495 
1496 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1497 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1498 
1500 // ------------------------------------------------------------------------
1501 // Atomic READ routines
1502 // ------------------------------------------------------------------------
1503 
1504 // ------------------------------------------------------------------------
1505 // Beginning of a definition (provides name, parameters, gebug trace)
1506 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1507 // OP_ID - operation identifier (add, sub, mul, ...)
1508 // TYPE - operands' type
1509 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1510 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1511 { \
1512  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1513  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1514 
1515 // ------------------------------------------------------------------------
1516 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1517 // TYPE - operands' type
1518 // BITS - size in bits, used to distinguish low level calls
1519 // OP - operator
1520 // Note: temp_val introduced in order to force the compiler to read
1521 // *lhs only once (w/o it the compiler reads *lhs twice)
1522 // TODO: check if it is still necessary
1523 // Return old value regardless of the result of "compare & swap# operation
1524 
1525 #define OP_CMPXCHG_READ(TYPE,BITS,OP) \
1526  { \
1527  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1528  union f_i_union { \
1529  TYPE f_val; \
1530  kmp_int##BITS i_val; \
1531  }; \
1532  union f_i_union old_value; \
1533  temp_val = *loc; \
1534  old_value.f_val = temp_val; \
1535  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
1536  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \
1537  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1538  new_value = old_value.f_val; \
1539  return new_value; \
1540  }
1541 
1542 // -------------------------------------------------------------------------
1543 // Operation on *lhs, rhs bound by critical section
1544 // OP - operator (it's supposed to contain an assignment)
1545 // LCK_ID - lock identifier
1546 // Note: don't check gtid as it should always be valid
1547 // 1, 2-byte - expect valid parameter, other - check before this macro
1548 #define OP_CRITICAL_READ(OP,LCK_ID) \
1549  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1550  \
1551  new_value = (*loc); \
1552  \
1553  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1554 
1555 // -------------------------------------------------------------------------
1556 #ifdef KMP_GOMP_COMPAT
1557 #define OP_GOMP_CRITICAL_READ(OP,FLAG) \
1558  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1559  KMP_CHECK_GTID; \
1560  OP_CRITICAL_READ( OP, 0 ); \
1561  return new_value; \
1562  }
1563 #else
1564 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
1565 #endif /* KMP_GOMP_COMPAT */
1566 
1567 // -------------------------------------------------------------------------
1568 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1569 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1570  TYPE new_value; \
1571  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1572  new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \
1573  return new_value; \
1574 }
1575 // -------------------------------------------------------------------------
1576 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1577 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1578  TYPE new_value; \
1579  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1580  OP_CMPXCHG_READ(TYPE,BITS,OP) \
1581 }
1582 // ------------------------------------------------------------------------
1583 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1584 // TYPE_ID, OP_ID, TYPE - detailed above
1585 // OP - operator
1586 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1587 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1588 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1589  TYPE new_value; \
1590  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \
1591  OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \
1592  return new_value; \
1593 }
1594 
1595 // ------------------------------------------------------------------------
1596 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1597 // Let's return the read value through the additional parameter.
1598 
1599 #if ( KMP_OS_WINDOWS )
1600 
1601 #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \
1602  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1603  \
1604  (*out) = (*loc); \
1605  \
1606  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1607 // ------------------------------------------------------------------------
1608 #ifdef KMP_GOMP_COMPAT
1609 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \
1610  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1611  KMP_CHECK_GTID; \
1612  OP_CRITICAL_READ_WRK( OP, 0 ); \
1613  }
1614 #else
1615 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1616 #endif /* KMP_GOMP_COMPAT */
1617 // ------------------------------------------------------------------------
1618 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1619 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1620 { \
1621  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1622  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1623 
1624 // ------------------------------------------------------------------------
1625 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1626 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1627  OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \
1628  OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \
1629 }
1630 
1631 #endif // KMP_OS_WINDOWS
1632 
1633 // ------------------------------------------------------------------------
1634 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1635 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd
1636 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd
1637 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd
1638 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd
1639 
1640 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1641 ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd
1642 ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
1643 
1644 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
1645 #if KMP_HAVE_QUAD
1646 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
1647 #endif // KMP_HAVE_QUAD
1648 
1649 // Fix for CQ220361 on Windows* OS
1650 #if ( KMP_OS_WINDOWS )
1651  ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1652 #else
1653  ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1654 #endif
1655 ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
1656 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
1657 #if KMP_HAVE_QUAD
1658 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
1659 #if ( KMP_ARCH_X86 )
1660  ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
1661  ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1662 #endif
1663 #endif
1664 
1665 
1666 // ------------------------------------------------------------------------
1667 // Atomic WRITE routines
1668 // ------------------------------------------------------------------------
1669 
1670 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1671 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1672  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1673  KMP_XCHG_FIXED##BITS( lhs, rhs ); \
1674 }
1675 // ------------------------------------------------------------------------
1676 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1677 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1678  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1679  KMP_XCHG_REAL##BITS( lhs, rhs ); \
1680 }
1681 
1682 
1683 // ------------------------------------------------------------------------
1684 // Operation on *lhs, rhs using "compare_and_store" routine
1685 // TYPE - operands' type
1686 // BITS - size in bits, used to distinguish low level calls
1687 // OP - operator
1688 // Note: temp_val introduced in order to force the compiler to read
1689 // *lhs only once (w/o it the compiler reads *lhs twice)
1690 #define OP_CMPXCHG_WR(TYPE,BITS,OP) \
1691  { \
1692  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1693  TYPE old_value, new_value; \
1694  temp_val = *lhs; \
1695  old_value = temp_val; \
1696  new_value = rhs; \
1697  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1698  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1699  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1700  { \
1701  KMP_CPU_PAUSE(); \
1702  \
1703  temp_val = *lhs; \
1704  old_value = temp_val; \
1705  new_value = rhs; \
1706  } \
1707  }
1708 
1709 // -------------------------------------------------------------------------
1710 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1711 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1712  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1713  OP_CMPXCHG_WR(TYPE,BITS,OP) \
1714 }
1715 
1716 // ------------------------------------------------------------------------
1717 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1718 // TYPE_ID, OP_ID, TYPE - detailed above
1719 // OP - operator
1720 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1721 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1722 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1723  OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \
1724  OP_CRITICAL(OP,LCK_ID) /* send assignment */ \
1725 }
1726 // -------------------------------------------------------------------------
1727 
1728 ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr
1729 ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr
1730 ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr
1731 #if ( KMP_ARCH_X86 )
1732  ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1733 #else
1734  ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1735 #endif
1736 
1737 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr
1738 #if ( KMP_ARCH_X86 )
1739  ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1740 #else
1741  ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1742 #endif
1743 
1744 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
1745 #if KMP_HAVE_QUAD
1746 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
1747 #endif
1748 ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
1749 ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
1750 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
1751 #if KMP_HAVE_QUAD
1752 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
1753 #if ( KMP_ARCH_X86 )
1754  ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1755  ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1756 #endif
1757 #endif
1758 
1759 
1760 // ------------------------------------------------------------------------
1761 // Atomic CAPTURE routines
1762 // ------------------------------------------------------------------------
1763 
1764 // Beginning of a definition (provides name, parameters, gebug trace)
1765 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1766 // OP_ID - operation identifier (add, sub, mul, ...)
1767 // TYPE - operands' type
1768 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \
1769 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1770 { \
1771  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1772  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1773 
1774 // -------------------------------------------------------------------------
1775 // Operation on *lhs, rhs bound by critical section
1776 // OP - operator (it's supposed to contain an assignment)
1777 // LCK_ID - lock identifier
1778 // Note: don't check gtid as it should always be valid
1779 // 1, 2-byte - expect valid parameter, other - check before this macro
1780 #define OP_CRITICAL_CPT(OP,LCK_ID) \
1781  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1782  \
1783  if( flag ) { \
1784  (*lhs) OP rhs; \
1785  new_value = (*lhs); \
1786  } else { \
1787  new_value = (*lhs); \
1788  (*lhs) OP rhs; \
1789  } \
1790  \
1791  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1792  return new_value;
1793 
1794 // ------------------------------------------------------------------------
1795 #ifdef KMP_GOMP_COMPAT
1796 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \
1797  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1798  KMP_CHECK_GTID; \
1799  OP_CRITICAL_CPT( OP##=, 0 ); \
1800  }
1801 #else
1802 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1803 #endif /* KMP_GOMP_COMPAT */
1804 
1805 // ------------------------------------------------------------------------
1806 // Operation on *lhs, rhs using "compare_and_store" routine
1807 // TYPE - operands' type
1808 // BITS - size in bits, used to distinguish low level calls
1809 // OP - operator
1810 // Note: temp_val introduced in order to force the compiler to read
1811 // *lhs only once (w/o it the compiler reads *lhs twice)
1812 #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1813  { \
1814  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1815  TYPE old_value, new_value; \
1816  temp_val = *lhs; \
1817  old_value = temp_val; \
1818  new_value = old_value OP rhs; \
1819  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1820  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1821  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1822  { \
1823  KMP_CPU_PAUSE(); \
1824  \
1825  temp_val = *lhs; \
1826  old_value = temp_val; \
1827  new_value = old_value OP rhs; \
1828  } \
1829  if( flag ) { \
1830  return new_value; \
1831  } else \
1832  return old_value; \
1833  }
1834 
1835 // -------------------------------------------------------------------------
1836 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1837 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1838  TYPE new_value; \
1839  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1840  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1841 }
1842 
1843 // -------------------------------------------------------------------------
1844 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1845 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1846  TYPE old_value, new_value; \
1847  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1848  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1849  old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
1850  if( flag ) { \
1851  return old_value OP rhs; \
1852  } else \
1853  return old_value; \
1854 }
1855 // -------------------------------------------------------------------------
1856 
1857 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt
1858 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt
1859 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt
1860 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt
1861 
1862 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1863 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1864 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1865 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1866 
1867 // ------------------------------------------------------------------------
1868 // Entries definition for integer operands
1869 // TYPE_ID - operands type and size (fixed4, float4)
1870 // OP_ID - operation identifier (add, sub, mul, ...)
1871 // TYPE - operand type
1872 // BITS - size in bits, used to distinguish low level calls
1873 // OP - operator (used in critical section)
1874 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
1875 // ------------------------------------------------------------------------
1876 // Routines for ATOMIC integer operands, other operators
1877 // ------------------------------------------------------------------------
1878 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1879 ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt
1880 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt
1881 ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt
1882 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt
1883 ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt
1884 ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt
1885 ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt
1886 ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt
1887 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt
1888 ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt
1889 ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt
1890 ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt
1891 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt
1892 ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt
1893 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt
1894 ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt
1895 ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt
1896 ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt
1897 ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt
1898 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt
1899 ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt
1900 ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt
1901 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt
1902 ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt
1903 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt
1904 ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt
1905 ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt
1906 ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt
1907 ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt
1908 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt
1909 ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt
1910 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt
1911 ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt
1912 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt
1913 ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt
1914 ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt
1915 ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt
1916 ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt
1917 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt
1918 ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt
1919 ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt
1920 ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt
1921 ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt
1922 ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt
1923 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1924 
1925 // ------------------------------------------------------------------------
1926 // Routines for C/C++ Reduction operators && and ||
1927 // ------------------------------------------------------------------------
1928 
1929 // -------------------------------------------------------------------------
1930 // Operation on *lhs, rhs bound by critical section
1931 // OP - operator (it's supposed to contain an assignment)
1932 // LCK_ID - lock identifier
1933 // Note: don't check gtid as it should always be valid
1934 // 1, 2-byte - expect valid parameter, other - check before this macro
1935 #define OP_CRITICAL_L_CPT(OP,LCK_ID) \
1936  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1937  \
1938  if( flag ) { \
1939  new_value OP rhs; \
1940  } else \
1941  new_value = (*lhs); \
1942  \
1943  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1944 
1945 // ------------------------------------------------------------------------
1946 #ifdef KMP_GOMP_COMPAT
1947 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \
1948  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1949  KMP_CHECK_GTID; \
1950  OP_CRITICAL_L_CPT( OP, 0 ); \
1951  return new_value; \
1952  }
1953 #else
1954 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
1955 #endif /* KMP_GOMP_COMPAT */
1956 
1957 // ------------------------------------------------------------------------
1958 // Need separate macros for &&, || because there is no combined assignment
1959 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1960 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1961  TYPE new_value; \
1962  OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \
1963  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1964 }
1965 
1966 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt
1967 ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt
1968 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt
1969 ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt
1970 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt
1971 ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt
1972 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt
1973 ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt
1974 
1975 
1976 // -------------------------------------------------------------------------
1977 // Routines for Fortran operators that matched no one in C:
1978 // MAX, MIN, .EQV., .NEQV.
1979 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
1980 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
1981 // -------------------------------------------------------------------------
1982 
1983 // -------------------------------------------------------------------------
1984 // MIN and MAX need separate macros
1985 // OP - operator to check if we need any actions?
1986 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
1987  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1988  \
1989  if ( *lhs OP rhs ) { /* still need actions? */ \
1990  old_value = *lhs; \
1991  *lhs = rhs; \
1992  if ( flag ) \
1993  new_value = rhs; \
1994  else \
1995  new_value = old_value; \
1996  } \
1997  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1998  return new_value; \
1999 
2000 // -------------------------------------------------------------------------
2001 #ifdef KMP_GOMP_COMPAT
2002 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \
2003  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
2004  KMP_CHECK_GTID; \
2005  MIN_MAX_CRITSECT_CPT( OP, 0 ); \
2006  }
2007 #else
2008 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
2009 #endif /* KMP_GOMP_COMPAT */
2010 
2011 // -------------------------------------------------------------------------
2012 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2013  { \
2014  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2015  /*TYPE old_value; */ \
2016  temp_val = *lhs; \
2017  old_value = temp_val; \
2018  while ( old_value OP rhs && /* still need actions? */ \
2019  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2020  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2021  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
2022  { \
2023  KMP_CPU_PAUSE(); \
2024  temp_val = *lhs; \
2025  old_value = temp_val; \
2026  } \
2027  if( flag ) \
2028  return rhs; \
2029  else \
2030  return old_value; \
2031  }
2032 
2033 // -------------------------------------------------------------------------
2034 // 1-byte, 2-byte operands - use critical section
2035 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2036 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2037  TYPE new_value, old_value; \
2038  if ( *lhs OP rhs ) { /* need actions? */ \
2039  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2040  MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2041  } \
2042  return *lhs; \
2043 }
2044 
2045 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2046 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2047  TYPE new_value, old_value; \
2048  if ( *lhs OP rhs ) { \
2049  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2050  MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2051  } \
2052  return *lhs; \
2053 }
2054 
2055 
2056 MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2057 MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2058 MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2059 MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2060 MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt
2061 MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt
2062 MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2063 MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2064 MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2065 MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2066 MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2067 MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
2068 #if KMP_HAVE_QUAD
2069 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
2070 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
2071 #if ( KMP_ARCH_X86 )
2072  MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
2073  MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
2074 #endif
2075 #endif
2076 
2077 // ------------------------------------------------------------------------
2078 #ifdef KMP_GOMP_COMPAT
2079 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \
2080  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2081  KMP_CHECK_GTID; \
2082  OP_CRITICAL_CPT( OP, 0 ); \
2083  }
2084 #else
2085 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2086 #endif /* KMP_GOMP_COMPAT */
2087 // ------------------------------------------------------------------------
2088 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2089 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2090  TYPE new_value; \
2091  OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \
2092  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2093 }
2094 
2095 // ------------------------------------------------------------------------
2096 
2097 ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2098 ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2099 ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2100 ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2101 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2102 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2103 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2104 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2105 
2106 // ------------------------------------------------------------------------
2107 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2108 // TYPE_ID, OP_ID, TYPE - detailed above
2109 // OP - operator
2110 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2111 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2112 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2113  TYPE new_value; \
2114  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2115  OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2116 }
2117 
2118 // ------------------------------------------------------------------------
2119 
2120 // Workaround for cmplx4. Regular routines with return value don't work
2121 // on Win_32e. Let's return captured values through the additional parameter.
2122 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \
2123  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2124  \
2125  if( flag ) { \
2126  (*lhs) OP rhs; \
2127  (*out) = (*lhs); \
2128  } else { \
2129  (*out) = (*lhs); \
2130  (*lhs) OP rhs; \
2131  } \
2132  \
2133  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2134  return;
2135 // ------------------------------------------------------------------------
2136 
2137 #ifdef KMP_GOMP_COMPAT
2138 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \
2139  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2140  KMP_CHECK_GTID; \
2141  OP_CRITICAL_CPT_WRK( OP##=, 0 ); \
2142  }
2143 #else
2144 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2145 #endif /* KMP_GOMP_COMPAT */
2146 // ------------------------------------------------------------------------
2147 
2148 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2149 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2150 { \
2151  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2152  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2153 // ------------------------------------------------------------------------
2154 
2155 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2156 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2157  OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \
2158  OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \
2159 }
2160 // The end of workaround for cmplx4
2161 
2162 /* ------------------------------------------------------------------------- */
2163 // routines for long double type
2164 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt
2165 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
2166 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
2167 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
2168 #if KMP_HAVE_QUAD
2169 // routines for _Quad type
2170 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
2171 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
2172 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt
2173 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt
2174 #if ( KMP_ARCH_X86 )
2175  ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt
2176  ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt
2177  ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
2178  ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
2179 #endif
2180 #endif
2181 
2182 // routines for complex types
2183 
2184 // cmplx4 routines to return void
2185 ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt
2186 ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt
2187 ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt
2188 ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt
2189 
2190 ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt
2191 ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt
2192 ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt
2193 ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt
2194 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt
2195 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
2196 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
2197 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
2198 #if KMP_HAVE_QUAD
2199 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
2200 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
2201 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
2202 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt
2203 #if ( KMP_ARCH_X86 )
2204  ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt
2205  ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt
2206  ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
2207  ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
2208 #endif
2209 #endif
2210 
2211 #if OMP_40_ENABLED
2212 
2213 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
2214 // Supported only on IA-32 architecture and Intel(R) 64
2215 
2216 // -------------------------------------------------------------------------
2217 // Operation on *lhs, rhs bound by critical section
2218 // OP - operator (it's supposed to contain an assignment)
2219 // LCK_ID - lock identifier
2220 // Note: don't check gtid as it should always be valid
2221 // 1, 2-byte - expect valid parameter, other - check before this macro
2222 #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2223  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2224  \
2225  if( flag ) { \
2226  /*temp_val = (*lhs);*/\
2227  (*lhs) = (rhs) OP (*lhs); \
2228  new_value = (*lhs); \
2229  } else { \
2230  new_value = (*lhs);\
2231  (*lhs) = (rhs) OP (*lhs); \
2232  } \
2233  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2234  return new_value;
2235 
2236 // ------------------------------------------------------------------------
2237 #ifdef KMP_GOMP_COMPAT
2238 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \
2239  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2240  KMP_CHECK_GTID; \
2241  OP_CRITICAL_CPT_REV( OP, 0 ); \
2242  }
2243 #else
2244 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2245 #endif /* KMP_GOMP_COMPAT */
2246 
2247 // ------------------------------------------------------------------------
2248 // Operation on *lhs, rhs using "compare_and_store" routine
2249 // TYPE - operands' type
2250 // BITS - size in bits, used to distinguish low level calls
2251 // OP - operator
2252 // Note: temp_val introduced in order to force the compiler to read
2253 // *lhs only once (w/o it the compiler reads *lhs twice)
2254 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2255  { \
2256  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2257  TYPE old_value, new_value; \
2258  temp_val = *lhs; \
2259  old_value = temp_val; \
2260  new_value = rhs OP old_value; \
2261  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2262  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2263  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2264  { \
2265  KMP_CPU_PAUSE(); \
2266  \
2267  temp_val = *lhs; \
2268  old_value = temp_val; \
2269  new_value = rhs OP old_value; \
2270  } \
2271  if( flag ) { \
2272  return new_value; \
2273  } else \
2274  return old_value; \
2275  }
2276 
2277 // -------------------------------------------------------------------------
2278 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2279 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2280  TYPE new_value; \
2281  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2282  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2283  OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2284 }
2285 
2286 
2287 ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev
2288 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev
2289 ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev
2290 ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev
2291 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev
2292 ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev
2293 ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev
2294 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev
2295 ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev
2296 ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev
2297 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev
2298 ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev
2299 ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev
2300 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev
2301 ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev
2302 ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev
2303 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev
2304 ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev
2305 ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev
2306 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev
2307 ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev
2308 ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev
2309 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev
2310 ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev
2311 ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev
2312 ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev
2313 ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev
2314 ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev
2315 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2316 
2317 
2318 // ------------------------------------------------------------------------
2319 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2320 // TYPE_ID, OP_ID, TYPE - detailed above
2321 // OP - operator
2322 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2323 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2324 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2325  TYPE new_value; \
2326  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2327  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2328  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2329  OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2330 }
2331 
2332 
2333 /* ------------------------------------------------------------------------- */
2334 // routines for long double type
2335 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
2336 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
2337 #if KMP_HAVE_QUAD
2338 // routines for _Quad type
2339 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
2340 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
2341 #if ( KMP_ARCH_X86 )
2342  ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
2343  ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
2344 #endif
2345 #endif
2346 
2347 // routines for complex types
2348 
2349 // ------------------------------------------------------------------------
2350 
2351 // Workaround for cmplx4. Regular routines with return value don't work
2352 // on Win_32e. Let's return captured values through the additional parameter.
2353 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2354  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2355  \
2356  if( flag ) { \
2357  (*lhs) = (rhs) OP (*lhs); \
2358  (*out) = (*lhs); \
2359  } else { \
2360  (*out) = (*lhs); \
2361  (*lhs) = (rhs) OP (*lhs); \
2362  } \
2363  \
2364  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2365  return;
2366 // ------------------------------------------------------------------------
2367 
2368 #ifdef KMP_GOMP_COMPAT
2369 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \
2370  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2371  KMP_CHECK_GTID; \
2372  OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \
2373  }
2374 #else
2375 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2376 #endif /* KMP_GOMP_COMPAT */
2377 // ------------------------------------------------------------------------
2378 
2379 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2380 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2381  OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \
2382  OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2383 }
2384 // The end of workaround for cmplx4
2385 
2386 
2387 // !!! TODO: check if we need to return void for cmplx4 routines
2388 // cmplx4 routines to return void
2389 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev
2390 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev
2391 
2392 ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev
2393 ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
2394 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
2395 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
2396 #if KMP_HAVE_QUAD
2397 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
2398 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
2399 #if ( KMP_ARCH_X86 )
2400  ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2401  ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2402 #endif
2403 #endif
2404 
2405 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2406 
2407 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2408 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
2409 { \
2410  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2411  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2412 
2413 #define CRITICAL_SWP(LCK_ID) \
2414  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2415  \
2416  old_value = (*lhs); \
2417  (*lhs) = rhs; \
2418  \
2419  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2420  return old_value;
2421 
2422 // ------------------------------------------------------------------------
2423 #ifdef KMP_GOMP_COMPAT
2424 #define GOMP_CRITICAL_SWP(FLAG) \
2425  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2426  KMP_CHECK_GTID; \
2427  CRITICAL_SWP( 0 ); \
2428  }
2429 #else
2430 #define GOMP_CRITICAL_SWP(FLAG)
2431 #endif /* KMP_GOMP_COMPAT */
2432 
2433 
2434 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2435 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2436  TYPE old_value; \
2437  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2438  old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \
2439  return old_value; \
2440 }
2441 // ------------------------------------------------------------------------
2442 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2443 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2444  TYPE old_value; \
2445  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2446  old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \
2447  return old_value; \
2448 }
2449 
2450 // ------------------------------------------------------------------------
2451 #define CMPXCHG_SWP(TYPE,BITS) \
2452  { \
2453  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2454  TYPE old_value, new_value; \
2455  temp_val = *lhs; \
2456  old_value = temp_val; \
2457  new_value = rhs; \
2458  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2459  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2460  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2461  { \
2462  KMP_CPU_PAUSE(); \
2463  \
2464  temp_val = *lhs; \
2465  old_value = temp_val; \
2466  new_value = rhs; \
2467  } \
2468  return old_value; \
2469  }
2470 
2471 // -------------------------------------------------------------------------
2472 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2473 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2474  TYPE old_value; \
2475  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2476  CMPXCHG_SWP(TYPE,BITS) \
2477 }
2478 
2479 ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp
2480 ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp
2481 ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp
2482 
2483 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp
2484 
2485 #if ( KMP_ARCH_X86 )
2486  ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2487  ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2488 #else
2489  ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2490  ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2491 #endif
2492 
2493 // ------------------------------------------------------------------------
2494 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2495 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \
2496 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2497  TYPE old_value; \
2498  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2499  CRITICAL_SWP(LCK_ID) \
2500 }
2501 
2502 // ------------------------------------------------------------------------
2503 
2504 // !!! TODO: check if we need to return void for cmplx4 routines
2505 // Workaround for cmplx4. Regular routines with return value don't work
2506 // on Win_32e. Let's return captured values through the additional parameter.
2507 
2508 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2509 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \
2510 { \
2511  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2512  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2513 
2514 
2515 #define CRITICAL_SWP_WRK(LCK_ID) \
2516  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2517  \
2518  tmp = (*lhs); \
2519  (*lhs) = (rhs); \
2520  (*out) = tmp; \
2521  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2522  return;
2523 
2524 // ------------------------------------------------------------------------
2525 
2526 #ifdef KMP_GOMP_COMPAT
2527 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
2528  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2529  KMP_CHECK_GTID; \
2530  CRITICAL_SWP_WRK( 0 ); \
2531  }
2532 #else
2533 #define GOMP_CRITICAL_SWP_WRK(FLAG)
2534 #endif /* KMP_GOMP_COMPAT */
2535 // ------------------------------------------------------------------------
2536 
2537 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \
2538 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2539  TYPE tmp; \
2540  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
2541  CRITICAL_SWP_WRK(LCK_ID) \
2542 }
2543 // The end of workaround for cmplx4
2544 
2545 
2546 ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
2547 #if KMP_HAVE_QUAD
2548 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
2549 #endif
2550 // cmplx4 routine to return void
2551 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2552 
2553 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2554 
2555 
2556 ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
2557 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
2558 #if KMP_HAVE_QUAD
2559 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
2560 #if ( KMP_ARCH_X86 )
2561  ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
2562  ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
2563 #endif
2564 #endif
2565 
2566 
2567 // End of OpenMP 4.0 Capture
2568 
2569 #endif //OMP_40_ENABLED
2570 
2571 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2572 
2573 
2574 #undef OP_CRITICAL
2575 
2576 /* ------------------------------------------------------------------------ */
2577 /* Generic atomic routines */
2578 /* ------------------------------------------------------------------------ */
2579 
2580 void
2581 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2582 {
2583  KMP_DEBUG_ASSERT( __kmp_init_serial );
2584 
2585  if (
2586 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2587  FALSE /* must use lock */
2588 #else
2589  TRUE
2590 #endif
2591  )
2592  {
2593  kmp_int8 old_value, new_value;
2594 
2595  old_value = *(kmp_int8 *) lhs;
2596  (*f)( &new_value, &old_value, rhs );
2597 
2598  /* TODO: Should this be acquire or release? */
2599  while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2600  *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2601  {
2602  KMP_CPU_PAUSE();
2603 
2604  old_value = *(kmp_int8 *) lhs;
2605  (*f)( &new_value, &old_value, rhs );
2606  }
2607 
2608  return;
2609  }
2610  else {
2611  //
2612  // All 1-byte data is of integer data type.
2613  //
2614 
2615 #ifdef KMP_GOMP_COMPAT
2616  if ( __kmp_atomic_mode == 2 ) {
2617  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2618  }
2619  else
2620 #endif /* KMP_GOMP_COMPAT */
2621  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2622 
2623  (*f)( lhs, lhs, rhs );
2624 
2625 #ifdef KMP_GOMP_COMPAT
2626  if ( __kmp_atomic_mode == 2 ) {
2627  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2628  }
2629  else
2630 #endif /* KMP_GOMP_COMPAT */
2631  __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2632  }
2633 }
2634 
2635 void
2636 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2637 {
2638  if (
2639 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2640  FALSE /* must use lock */
2641 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2642  TRUE /* no alignment problems */
2643 #else
2644  ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */
2645 #endif
2646  )
2647  {
2648  kmp_int16 old_value, new_value;
2649 
2650  old_value = *(kmp_int16 *) lhs;
2651  (*f)( &new_value, &old_value, rhs );
2652 
2653  /* TODO: Should this be acquire or release? */
2654  while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2655  *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2656  {
2657  KMP_CPU_PAUSE();
2658 
2659  old_value = *(kmp_int16 *) lhs;
2660  (*f)( &new_value, &old_value, rhs );
2661  }
2662 
2663  return;
2664  }
2665  else {
2666  //
2667  // All 2-byte data is of integer data type.
2668  //
2669 
2670 #ifdef KMP_GOMP_COMPAT
2671  if ( __kmp_atomic_mode == 2 ) {
2672  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2673  }
2674  else
2675 #endif /* KMP_GOMP_COMPAT */
2676  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2677 
2678  (*f)( lhs, lhs, rhs );
2679 
2680 #ifdef KMP_GOMP_COMPAT
2681  if ( __kmp_atomic_mode == 2 ) {
2682  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2683  }
2684  else
2685 #endif /* KMP_GOMP_COMPAT */
2686  __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2687  }
2688 }
2689 
2690 void
2691 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2692 {
2693  KMP_DEBUG_ASSERT( __kmp_init_serial );
2694 
2695  if (
2696  //
2697  // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2698  // Gomp compatibility is broken if this routine is called for floats.
2699  //
2700 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2701  TRUE /* no alignment problems */
2702 #else
2703  ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */
2704 #endif
2705  )
2706  {
2707  kmp_int32 old_value, new_value;
2708 
2709  old_value = *(kmp_int32 *) lhs;
2710  (*f)( &new_value, &old_value, rhs );
2711 
2712  /* TODO: Should this be acquire or release? */
2713  while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2714  *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2715  {
2716  KMP_CPU_PAUSE();
2717 
2718  old_value = *(kmp_int32 *) lhs;
2719  (*f)( &new_value, &old_value, rhs );
2720  }
2721 
2722  return;
2723  }
2724  else {
2725  //
2726  // Use __kmp_atomic_lock_4i for all 4-byte data,
2727  // even if it isn't of integer data type.
2728  //
2729 
2730 #ifdef KMP_GOMP_COMPAT
2731  if ( __kmp_atomic_mode == 2 ) {
2732  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2733  }
2734  else
2735 #endif /* KMP_GOMP_COMPAT */
2736  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2737 
2738  (*f)( lhs, lhs, rhs );
2739 
2740 #ifdef KMP_GOMP_COMPAT
2741  if ( __kmp_atomic_mode == 2 ) {
2742  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2743  }
2744  else
2745 #endif /* KMP_GOMP_COMPAT */
2746  __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2747  }
2748 }
2749 
2750 void
2751 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2752 {
2753  KMP_DEBUG_ASSERT( __kmp_init_serial );
2754  if (
2755 
2756 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2757  FALSE /* must use lock */
2758 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2759  TRUE /* no alignment problems */
2760 #else
2761  ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */
2762 #endif
2763  )
2764  {
2765  kmp_int64 old_value, new_value;
2766 
2767  old_value = *(kmp_int64 *) lhs;
2768  (*f)( &new_value, &old_value, rhs );
2769  /* TODO: Should this be acquire or release? */
2770  while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2771  *(kmp_int64 *) &old_value,
2772  *(kmp_int64 *) &new_value ) )
2773  {
2774  KMP_CPU_PAUSE();
2775 
2776  old_value = *(kmp_int64 *) lhs;
2777  (*f)( &new_value, &old_value, rhs );
2778  }
2779 
2780  return;
2781  } else {
2782  //
2783  // Use __kmp_atomic_lock_8i for all 8-byte data,
2784  // even if it isn't of integer data type.
2785  //
2786 
2787 #ifdef KMP_GOMP_COMPAT
2788  if ( __kmp_atomic_mode == 2 ) {
2789  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2790  }
2791  else
2792 #endif /* KMP_GOMP_COMPAT */
2793  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2794 
2795  (*f)( lhs, lhs, rhs );
2796 
2797 #ifdef KMP_GOMP_COMPAT
2798  if ( __kmp_atomic_mode == 2 ) {
2799  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2800  }
2801  else
2802 #endif /* KMP_GOMP_COMPAT */
2803  __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2804  }
2805 }
2806 
2807 void
2808 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2809 {
2810  KMP_DEBUG_ASSERT( __kmp_init_serial );
2811 
2812 #ifdef KMP_GOMP_COMPAT
2813  if ( __kmp_atomic_mode == 2 ) {
2814  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2815  }
2816  else
2817 #endif /* KMP_GOMP_COMPAT */
2818  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2819 
2820  (*f)( lhs, lhs, rhs );
2821 
2822 #ifdef KMP_GOMP_COMPAT
2823  if ( __kmp_atomic_mode == 2 ) {
2824  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2825  }
2826  else
2827 #endif /* KMP_GOMP_COMPAT */
2828  __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2829 }
2830 
2831 void
2832 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2833 {
2834  KMP_DEBUG_ASSERT( __kmp_init_serial );
2835 
2836 #ifdef KMP_GOMP_COMPAT
2837  if ( __kmp_atomic_mode == 2 ) {
2838  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2839  }
2840  else
2841 #endif /* KMP_GOMP_COMPAT */
2842  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2843 
2844  (*f)( lhs, lhs, rhs );
2845 
2846 #ifdef KMP_GOMP_COMPAT
2847  if ( __kmp_atomic_mode == 2 ) {
2848  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2849  }
2850  else
2851 #endif /* KMP_GOMP_COMPAT */
2852  __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2853 }
2854 
2855 void
2856 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2857 {
2858  KMP_DEBUG_ASSERT( __kmp_init_serial );
2859 
2860 #ifdef KMP_GOMP_COMPAT
2861  if ( __kmp_atomic_mode == 2 ) {
2862  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2863  }
2864  else
2865 #endif /* KMP_GOMP_COMPAT */
2866  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2867 
2868  (*f)( lhs, lhs, rhs );
2869 
2870 #ifdef KMP_GOMP_COMPAT
2871  if ( __kmp_atomic_mode == 2 ) {
2872  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2873  }
2874  else
2875 #endif /* KMP_GOMP_COMPAT */
2876  __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2877 }
2878 
2879 void
2880 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2881 {
2882  KMP_DEBUG_ASSERT( __kmp_init_serial );
2883 
2884 #ifdef KMP_GOMP_COMPAT
2885  if ( __kmp_atomic_mode == 2 ) {
2886  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2887  }
2888  else
2889 #endif /* KMP_GOMP_COMPAT */
2890  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2891 
2892  (*f)( lhs, lhs, rhs );
2893 
2894 #ifdef KMP_GOMP_COMPAT
2895  if ( __kmp_atomic_mode == 2 ) {
2896  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2897  }
2898  else
2899 #endif /* KMP_GOMP_COMPAT */
2900  __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2901 }
2902 
2903 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
2904 // duplicated in order to not use 3-party names in pure Intel code
2905 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
2906 void
2907 __kmpc_atomic_start(void)
2908 {
2909  int gtid = __kmp_entry_gtid();
2910  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
2911  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
2912 }
2913 
2914 
2915 void
2916 __kmpc_atomic_end(void)
2917 {
2918  int gtid = __kmp_get_gtid();
2919  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
2920  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
2921 }
2922 
2923 /* ------------------------------------------------------------------------ */
2924 /* ------------------------------------------------------------------------ */
2929 // end of file
Definition: kmp.h:218