Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_itt.h
1 #if USE_ITT_BUILD
2 /*
3  * kmp_itt.h -- ITT Notify interface.
4  * $Revision: 43457 $
5  * $Date: 2014-09-17 03:57:22 -0500 (Wed, 17 Sep 2014) $
6  */
7 
8 /* <copyright>
9  Copyright (c) 1997-2014 Intel Corporation. All Rights Reserved.
10 
11  Redistribution and use in source and binary forms, with or without
12  modification, are permitted provided that the following conditions
13  are met:
14 
15  * Redistributions of source code must retain the above copyright
16  notice, this list of conditions and the following disclaimer.
17  * Redistributions in binary form must reproduce the above copyright
18  notice, this list of conditions and the following disclaimer in the
19  documentation and/or other materials provided with the distribution.
20  * Neither the name of Intel Corporation nor the names of its
21  contributors may be used to endorse or promote products derived
22  from this software without specific prior written permission.
23 
24  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 
36 </copyright> */
37 
38 #ifndef KMP_ITT_H
39 #define KMP_ITT_H
40 
41 #include "kmp_lock.h"
42 
43 #define INTEL_ITTNOTIFY_API_PRIVATE
44 #include "ittnotify.h"
45 #include "legacy/ittnotify.h"
46 
47 #if KMP_DEBUG
48  #define __kmp_inline // Turn off inlining in debug mode.
49 #else
50  #define __kmp_inline static inline
51 #endif
52 
53 #if USE_ITT_NOTIFY
54  extern kmp_int32 __kmp_itt_prepare_delay;
55 # ifdef __cplusplus
56  extern "C" void __kmp_itt_fini_ittlib(void);
57 # else
58  extern void __kmp_itt_fini_ittlib(void);
59 # endif
60 #endif
61 
62 // Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled.
63 #define USE_ITT_BUILD_ARG(x) ,x
64 
65 void __kmp_itt_initialize();
66 void __kmp_itt_destroy();
67 
68 // -------------------------------------------------------------------------------------------------
69 // New stuff for reporting high-level constructs.
70 // -------------------------------------------------------------------------------------------------
71 
72 // Note the naming convention:
73 // __kmp_itt_xxxing() function should be called before action, while
74 // __kmp_itt_xxxed() function should be called after action.
75 
76 // --- Parallel region reporting ---
77 __kmp_inline void __kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized = 0 ); // Master only, before forking threads.
78 __kmp_inline void __kmp_itt_region_joined( int gtid, int serialized = 0 ); // Master only, after joining threads.
79  // (*) Note: A thread may execute tasks after this point, though.
80 
81 // --- Frame reporting ---
82 // region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel
83 __kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 );
84 
85 // --- Metadata reporting ---
86 // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier
87 __kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction );
88 // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size
89 __kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk );
90 __kmp_inline void __kmp_itt_metadata_single();
91 
92 // --- Barrier reporting ---
93 __kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 );
94 __kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object );
95 __kmp_inline void __kmp_itt_barrier_middle( int gtid, void * object );
96 __kmp_inline void __kmp_itt_barrier_finished( int gtid, void * object );
97 
98 // --- Taskwait reporting ---
99 __kmp_inline void * __kmp_itt_taskwait_object( int gtid );
100 __kmp_inline void __kmp_itt_taskwait_starting( int gtid, void * object );
101 __kmp_inline void __kmp_itt_taskwait_finished( int gtid, void * object );
102 
103 // --- Task reporting ---
104 __kmp_inline void __kmp_itt_task_starting( void * object );
105 __kmp_inline void __kmp_itt_task_finished( void * object );
106 
107 // --- Lock reporting ---
108 __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock );
109 __kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock );
110 __kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock );
111 __kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock );
112 __kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock );
113 __kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock );
114 
115 // --- Critical reporting ---
116 __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock );
117 __kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock );
118 __kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock );
119 __kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock );
120 __kmp_inline void __kmp_itt_critical_destroyed( kmp_user_lock_p lock );
121 
122 // --- Single reporting ---
123 __kmp_inline void __kmp_itt_single_start( int gtid );
124 __kmp_inline void __kmp_itt_single_end( int gtid );
125 
126 // --- Ordered reporting ---
127 __kmp_inline void __kmp_itt_ordered_init( int gtid );
128 __kmp_inline void __kmp_itt_ordered_prep( int gtid );
129 __kmp_inline void __kmp_itt_ordered_start( int gtid );
130 __kmp_inline void __kmp_itt_ordered_end( int gtid );
131 
132 // --- Threads reporting ---
133 __kmp_inline void __kmp_itt_thread_ignore();
134 __kmp_inline void __kmp_itt_thread_name( int gtid );
135 
136 // --- System objects ---
137 __kmp_inline void __kmp_itt_system_object_created( void * object, char const * name );
138 
139 // --- Stack stitching ---
140 __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
141 __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
142 __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
143 __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
144 
145 // -------------------------------------------------------------------------------------------------
146 // Old stuff for reporting low-level internal synchronization.
147 // -------------------------------------------------------------------------------------------------
148 
149 #if USE_ITT_NOTIFY
150 
151  /*
152  * Support for SSC marks, which are used by SDE
153  * http://software.intel.com/en-us/articles/intel-software-development-emulator
154  * to mark points in instruction traces that represent spin-loops and are
155  * therefore uninteresting when collecting traces for architecture simulation.
156  */
157  #ifndef INCLUDE_SSC_MARKS
158  # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
159  #endif
160 
161  /* Linux 64 only for now */
162  #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
163  // Portable (at least for gcc and icc) code to insert the necessary instructions
164  // to set %ebx and execute the unlikely no-op.
165  #if defined( __INTEL_COMPILER )
166  # define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
167  #else
168  # define INSERT_SSC_MARK(tag) \
169  __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx")
170  #endif
171  #else
172  # define INSERT_SSC_MARK(tag) ((void)0)
173  #endif
174 
175  /* Markers for the start and end of regions that represent polling and
176  * are therefore uninteresting to architectural simulations 0x4376 and
177  * 0x4377 are arbitrary numbers that should be unique in the space of
178  * SSC tags, but there is no central issuing authority rather
179  * randomness is expected to work.
180  */
181  #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
182  #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
183 
184  // Markers for architecture simulation.
185  // FORKING : Before the master thread forks.
186  // JOINING : At the start of the join.
187  // INVOKING : Before the threads invoke microtasks.
188  // DISPATCH_INIT: At the start of dynamically scheduled loop.
189  // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
190  #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
191  #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
192  #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
193  #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
194  #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
195 
196  // The object is an address that associates a specific set of the prepare, acquire, release,
197  // and cancel operations.
198 
199  /* Sync prepare indicates a thread is going to start waiting for another thread
200  to send a release event. This operation should be done just before the thread
201  begins checking for the existence of the release event */
202 
203  /* Sync cancel indicates a thread is cancelling a wait on another thread anc
204  continuing execution without waiting for the other thread to release it */
205 
206  /* Sync acquired indicates a thread has received a release event from another
207  thread and has stopped waiting. This operation must occur only after the release
208  event is received. */
209 
210  /* Sync release indicates a thread is going to send a release event to another thread
211  so it will stop waiting and continue execution. This operation must just happen before
212  the release event. */
213 
214  #define KMP_FSYNC_PREPARE( obj ) __itt_fsync_prepare( (void *)( obj ) )
215  #define KMP_FSYNC_CANCEL( obj ) __itt_fsync_cancel( (void *)( obj ) )
216  #define KMP_FSYNC_ACQUIRED( obj ) __itt_fsync_acquired( (void *)( obj ) )
217  #define KMP_FSYNC_RELEASING( obj ) __itt_fsync_releasing( (void *)( obj ) )
218 
219  /*
220  In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay
221  (and not called at all if waiting time is small). So, in spin loops, do not use
222  KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop),
223  KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED().
224  See KMP_WAIT_YIELD() for example.
225  */
226 
227  #undef KMP_FSYNC_SPIN_INIT
228  #define KMP_FSYNC_SPIN_INIT( obj, spin ) \
229  int sync_iters = 0; \
230  if ( __itt_fsync_prepare_ptr ) { \
231  if ( obj == NULL ) { \
232  obj = spin; \
233  } /* if */ \
234  } /* if */ \
235  SSC_MARK_SPIN_START()
236 
237  #undef KMP_FSYNC_SPIN_PREPARE
238  #define KMP_FSYNC_SPIN_PREPARE( obj ) do { \
239  if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \
240  ++ sync_iters; \
241  if ( sync_iters >= __kmp_itt_prepare_delay ) { \
242  KMP_FSYNC_PREPARE( (void*) obj ); \
243  } /* if */ \
244  } /* if */ \
245  } while (0)
246  #undef KMP_FSYNC_SPIN_ACQUIRED
247  #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do { \
248  SSC_MARK_SPIN_END(); \
249  if ( sync_iters >= __kmp_itt_prepare_delay ) { \
250  KMP_FSYNC_ACQUIRED( (void*) obj ); \
251  } /* if */ \
252  } while (0)
253 
254  /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
255  KMP_ITT_IGNORE(
256  ptr = malloc( size );
257  );
258  */
259  #define KMP_ITT_IGNORE( statement ) do { \
260  __itt_state_t __itt_state_; \
261  if ( __itt_state_get_ptr ) { \
262  __itt_state_ = __itt_state_get(); \
263  __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \
264  } /* if */ \
265  { statement } \
266  if ( __itt_state_get_ptr ) { \
267  __itt_state_set( __itt_state_ ); \
268  } /* if */ \
269  } while (0)
270 
271  const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to
272  // different OpenMP regions in the user source code).
273  extern kmp_int32 __kmp_barrier_domain_count;
274  extern kmp_int32 __kmp_region_domain_count;
275  extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
276  extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
277  extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
278  extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
279  extern __itt_domain * metadata_domain;
280 
281 #else
282 
283 // Null definitions of the synchronization tracing functions.
284 # define KMP_FSYNC_PREPARE( obj ) ((void)0)
285 # define KMP_FSYNC_CANCEL( obj ) ((void)0)
286 # define KMP_FSYNC_ACQUIRED( obj ) ((void)0)
287 # define KMP_FSYNC_RELEASING( obj ) ((void)0)
288 
289 # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0)
290 # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0)
291 # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0)
292 
293 # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
294 
295 #endif // USE_ITT_NOTIFY
296 
297 #if ! KMP_DEBUG
298  // In release mode include definitions of inline functions.
299  #include "kmp_itt.inl"
300 #endif
301 
302 #endif // KMP_ITT_H
303 
304 #else /* USE_ITT_BUILD */
305 
306 // Null definitions of the synchronization tracing functions.
307 // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
308 // By defining these we avoid unpleasant ifdef tests in many places.
309 # define KMP_FSYNC_PREPARE( obj ) ((void)0)
310 # define KMP_FSYNC_CANCEL( obj ) ((void)0)
311 # define KMP_FSYNC_ACQUIRED( obj ) ((void)0)
312 # define KMP_FSYNC_RELEASING( obj ) ((void)0)
313 
314 # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0)
315 # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0)
316 # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0)
317 
318 # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
319 
320 # define USE_ITT_BUILD_ARG(x)
321 
322 #endif /* USE_ITT_BUILD */
Definition: kmp.h:218
sched_type
Definition: kmp.h:320