Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_gsupport.c
1 /*
2  * kmp_gsupport.c
3  * $Revision: 42181 $
4  * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #if defined(__x86_64)
38 # define KMP_I8
39 #endif
40 #include "kmp.h"
41 #include "kmp_atomic.h"
42 
43 #ifdef __cplusplus
44  extern "C" {
45 #endif // __cplusplus
46 
47 #define MKLOC(loc,routine) \
48  static ident_t (loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;" };
49 
50 
51 void
52 GOMP_barrier(void)
53 {
54  int gtid = __kmp_entry_gtid();
55  MKLOC(loc, "GOMP_barrier");
56  KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
57  __kmpc_barrier(&loc, gtid);
58 }
59 
60 
61 /* */
62 //
63 // Mutual exclusion
64 //
65 
66 //
67 // The symbol that icc/ifort generates for unnamed for unnamed critical
68 // sections - .gomp_critical_user_ - is defined using .comm in any objects
69 // reference it. We can't reference it directly here in C code, as the
70 // symbol contains a ".".
71 //
72 // The RTL contains an assembly language definition of .gomp_critical_user_
73 // with another symbol __kmp_unnamed_critical_addr initialized with it's
74 // address.
75 //
76 extern kmp_critical_name *__kmp_unnamed_critical_addr;
77 
78 
79 void
80 GOMP_critical_start(void)
81 {
82  int gtid = __kmp_entry_gtid();
83  MKLOC(loc, "GOMP_critical_start");
84  KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid));
85  __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr);
86 }
87 
88 
89 void
90 GOMP_critical_end(void)
91 {
92  int gtid = __kmp_get_gtid();
93  MKLOC(loc, "GOMP_critical_end");
94  KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid));
95  __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr);
96 }
97 
98 
99 void
100 GOMP_critical_name_start(void **pptr)
101 {
102  int gtid = __kmp_entry_gtid();
103  MKLOC(loc, "GOMP_critical_name_start");
104  KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid));
105  __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr);
106 }
107 
108 
109 void
110 GOMP_critical_name_end(void **pptr)
111 {
112  int gtid = __kmp_get_gtid();
113  MKLOC(loc, "GOMP_critical_name_end");
114  KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid));
115  __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr);
116 }
117 
118 
119 //
120 // The Gnu codegen tries to use locked operations to perform atomic updates
121 // inline. If it can't, then it calls GOMP_atomic_start() before performing
122 // the update and GOMP_atomic_end() afterward, regardless of the data type.
123 //
124 
125 void
126 GOMP_atomic_start(void)
127 {
128  int gtid = __kmp_entry_gtid();
129  KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
130  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
131 }
132 
133 
134 void
135 GOMP_atomic_end(void)
136 {
137  int gtid = __kmp_get_gtid();
138  KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
139  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
140 }
141 
142 
143 int
144 GOMP_single_start(void)
145 {
146  int gtid = __kmp_entry_gtid();
147  MKLOC(loc, "GOMP_single_start");
148  KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid));
149 
150  if (! TCR_4(__kmp_init_parallel))
151  __kmp_parallel_initialize();
152 
153  //
154  // 3rd parameter == FALSE prevents kmp_enter_single from pushing a
155  // workshare when USE_CHECKS is defined. We need to avoid the push,
156  // as there is no corresponding GOMP_single_end() call.
157  //
158  return __kmp_enter_single(gtid, &loc, FALSE);
159 }
160 
161 
162 void *
163 GOMP_single_copy_start(void)
164 {
165  void *retval;
166  int gtid = __kmp_entry_gtid();
167  MKLOC(loc, "GOMP_single_copy_start");
168  KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid));
169 
170  if (! TCR_4(__kmp_init_parallel))
171  __kmp_parallel_initialize();
172 
173  //
174  // If this is the first thread to enter, return NULL. The generated
175  // code will then call GOMP_single_copy_end() for this thread only,
176  // with the copyprivate data pointer as an argument.
177  //
178  if (__kmp_enter_single(gtid, &loc, FALSE))
179  return NULL;
180 
181  //
182  // Wait for the first thread to set the copyprivate data pointer,
183  // and for all other threads to reach this point.
184  //
185  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
186 
187  //
188  // Retrieve the value of the copyprivate data point, and wait for all
189  // threads to do likewise, then return.
190  //
191  retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data;
192  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
193  return retval;
194 }
195 
196 
197 void
198 GOMP_single_copy_end(void *data)
199 {
200  int gtid = __kmp_get_gtid();
201  MKLOC(loc, "GOMP_single_copy_end");
202  KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid));
203 
204  //
205  // Set the copyprivate data pointer fo the team, then hit the barrier
206  // so that the other threads will continue on and read it. Hit another
207  // barrier before continuing, so that the know that the copyprivate
208  // data pointer has been propagated to all threads before trying to
209  // reuse the t_copypriv_data field.
210  //
211  __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
212  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
213  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
214 }
215 
216 
217 void
218 GOMP_ordered_start(void)
219 {
220  int gtid = __kmp_entry_gtid();
221  MKLOC(loc, "GOMP_ordered_start");
222  KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
223  __kmpc_ordered(&loc, gtid);
224 }
225 
226 
227 void
228 GOMP_ordered_end(void)
229 {
230  int gtid = __kmp_get_gtid();
231  MKLOC(loc, "GOMP_ordered_end");
232  KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
233  __kmpc_end_ordered(&loc, gtid);
234 }
235 
236 
237 /* */
238 //
239 // Dispatch macro defs
240 //
241 // They come in two flavors: 64-bit unsigned, and either 32-bit signed
242 // (IA-32 architecture) or 64-bit signed (Intel(R) 64).
243 //
244 
245 #if KMP_ARCH_X86
246 # define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4
247 # define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4
248 # define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4
249 #else
250 # define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8
251 # define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8
252 # define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8
253 #endif /* KMP_ARCH_X86 */
254 
255 # define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u
256 # define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u
257 # define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u
258 
259 
260 /* */
261 //
262 // The parallel contruct
263 //
264 
265 #ifdef KMP_DEBUG
266 static
267 #endif /* KMP_DEBUG */
268 void
269 __kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *),
270  void *data)
271 {
272  task(data);
273 }
274 
275 
276 #ifdef KMP_DEBUG
277 static
278 #endif /* KMP_DEBUG */
279 void
280 __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr,
281  void (*task)(void *), void *data, unsigned num_threads, ident_t *loc,
282  enum sched_type schedule, long start, long end, long incr, long chunk_size)
283 {
284  //
285  // Intialize the loop worksharing construct.
286  //
287  KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
288  schedule != kmp_sch_static);
289 
290  //
291  // Now invoke the microtask.
292  //
293  task(data);
294 }
295 
296 
297 #ifdef KMP_DEBUG
298 static
299 #endif /* KMP_DEBUG */
300 void
301 __kmp_GOMP_fork_call(ident_t *loc, int gtid, microtask_t wrapper, int argc,...)
302 {
303  int rc;
304 
305  va_list ap;
306  va_start(ap, argc);
307 
308  rc = __kmp_fork_call(loc, gtid, FALSE, argc, wrapper, __kmp_invoke_task_func,
309 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
310  &ap
311 #else
312  ap
313 #endif
314  );
315 
316  va_end(ap);
317 
318  if (rc) {
319  kmp_info_t *thr = __kmp_threads[gtid];
320  __kmp_run_before_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
321  thr->th.th_team);
322  }
323 }
324 
325 
326 void
327 GOMP_parallel_start(void (*task)(void *), void *data, unsigned num_threads)
328 {
329  int gtid = __kmp_entry_gtid();
330  MKLOC(loc, "GOMP_parallel_start");
331  KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));
332 
333  if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
334  if (num_threads != 0) {
335  __kmp_push_num_threads(&loc, gtid, num_threads);
336  }
337  __kmp_GOMP_fork_call(&loc, gtid,
338  (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data);
339  }
340  else {
341  __kmpc_serialized_parallel(&loc, gtid);
342  }
343 }
344 
345 
346 void
347 GOMP_parallel_end(void)
348 {
349  int gtid = __kmp_get_gtid();
350  MKLOC(loc, "GOMP_parallel_end");
351  KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));
352 
353  if (! __kmp_threads[gtid]->th.th_team->t.t_serialized) {
354  kmp_info_t *thr = __kmp_threads[gtid];
355  __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
356  thr->th.th_team);
357  __kmp_join_call(&loc, gtid);
358  }
359  else {
360  __kmpc_end_serialized_parallel(&loc, gtid);
361  }
362 }
363 
364 
365 /* */
366 //
367 // Loop worksharing constructs
368 //
369 
370 //
371 // The Gnu codegen passes in an exclusive upper bound for the overall range,
372 // but the libguide dispatch code expects an inclusive upper bound, hence the
373 // "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th
374 // argument to __kmp_GOMP_fork_call).
375 //
376 // Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub,
377 // but the Gnu codegen expects an excluside upper bound, so the adjustment
378 // "*p_ub += stride" compenstates for the discrepancy.
379 //
380 // Correction: the gnu codegen always adjusts the upper bound by +-1, not the
381 // stride value. We adjust the dispatch parameters accordingly (by +-1), but
382 // we still adjust p_ub by the actual stride value.
383 //
384 // The "runtime" versions do not take a chunk_sz parameter.
385 //
386 // The profile lib cannot support construct checking of unordered loops that
387 // are predetermined by the compiler to be statically scheduled, as the gcc
388 // codegen will not always emit calls to GOMP_loop_static_next() to get the
389 // next iteration. Instead, it emits inline code to call omp_get_thread_num()
390 // num and calculate the iteration space using the result. It doesn't do this
391 // with ordered static loop, so they can be checked.
392 //
393 
394 #define LOOP_START(func,schedule) \
395  int func (long lb, long ub, long str, long chunk_sz, long *p_lb, \
396  long *p_ub) \
397  { \
398  int status; \
399  long stride; \
400  int gtid = __kmp_entry_gtid(); \
401  MKLOC(loc, #func); \
402  KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \
403  gtid, lb, ub, str, chunk_sz )); \
404  \
405  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
406  KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
407  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
408  (schedule) != kmp_sch_static); \
409  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
410  (kmp_int *)p_ub, (kmp_int *)&stride); \
411  if (status) { \
412  KMP_DEBUG_ASSERT(stride == str); \
413  *p_ub += (str > 0) ? 1 : -1; \
414  } \
415  } \
416  else { \
417  status = 0; \
418  } \
419  \
420  KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \
421  gtid, *p_lb, *p_ub, status)); \
422  return status; \
423  }
424 
425 
426 #define LOOP_RUNTIME_START(func,schedule) \
427  int func (long lb, long ub, long str, long *p_lb, long *p_ub) \
428  { \
429  int status; \
430  long stride; \
431  long chunk_sz = 0; \
432  int gtid = __kmp_entry_gtid(); \
433  MKLOC(loc, #func); \
434  KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \
435  gtid, lb, ub, str, chunk_sz )); \
436  \
437  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
438  KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
439  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
440  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
441  (kmp_int *)p_ub, (kmp_int *)&stride); \
442  if (status) { \
443  KMP_DEBUG_ASSERT(stride == str); \
444  *p_ub += (str > 0) ? 1 : -1; \
445  } \
446  } \
447  else { \
448  status = 0; \
449  } \
450  \
451  KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \
452  gtid, *p_lb, *p_ub, status)); \
453  return status; \
454  }
455 
456 
457 #define LOOP_NEXT(func,fini_code) \
458  int func(long *p_lb, long *p_ub) \
459  { \
460  int status; \
461  long stride; \
462  int gtid = __kmp_get_gtid(); \
463  MKLOC(loc, #func); \
464  KA_TRACE(20, ( #func ": T#%d\n", gtid)); \
465  \
466  fini_code \
467  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
468  (kmp_int *)p_ub, (kmp_int *)&stride); \
469  if (status) { \
470  *p_ub += (stride > 0) ? 1 : -1; \
471  } \
472  \
473  KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \
474  "returning %d\n", gtid, *p_lb, *p_ub, stride, status)); \
475  return status; \
476  }
477 
478 
479 LOOP_START(GOMP_loop_static_start, kmp_sch_static)
480 LOOP_NEXT(GOMP_loop_static_next, {})
481 LOOP_START(GOMP_loop_dynamic_start, kmp_sch_dynamic_chunked)
482 LOOP_NEXT(GOMP_loop_dynamic_next, {})
483 LOOP_START(GOMP_loop_guided_start, kmp_sch_guided_chunked)
484 LOOP_NEXT(GOMP_loop_guided_next, {})
485 LOOP_RUNTIME_START(GOMP_loop_runtime_start, kmp_sch_runtime)
486 LOOP_NEXT(GOMP_loop_runtime_next, {})
487 
488 LOOP_START(GOMP_loop_ordered_static_start, kmp_ord_static)
489 LOOP_NEXT(GOMP_loop_ordered_static_next, \
490  { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
491 LOOP_START(GOMP_loop_ordered_dynamic_start, kmp_ord_dynamic_chunked)
492 LOOP_NEXT(GOMP_loop_ordered_dynamic_next, \
493  { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
494 LOOP_START(GOMP_loop_ordered_guided_start, kmp_ord_guided_chunked)
495 LOOP_NEXT(GOMP_loop_ordered_guided_next, \
496  { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
497 LOOP_RUNTIME_START(GOMP_loop_ordered_runtime_start, kmp_ord_runtime)
498 LOOP_NEXT(GOMP_loop_ordered_runtime_next, \
499  { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
500 
501 
502 void
503 GOMP_loop_end(void)
504 {
505  int gtid = __kmp_get_gtid();
506  KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
507 
508  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
509 
510  KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid))
511 }
512 
513 
514 void
515 GOMP_loop_end_nowait(void)
516 {
517  KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid()))
518 }
519 
520 
521 /* */
522 //
523 // Unsigned long long loop worksharing constructs
524 //
525 // These are new with gcc 4.4
526 //
527 
528 #define LOOP_START_ULL(func,schedule) \
529  int func (int up, unsigned long long lb, unsigned long long ub, \
530  unsigned long long str, unsigned long long chunk_sz, \
531  unsigned long long *p_lb, unsigned long long *p_ub) \
532  { \
533  int status; \
534  long long str2 = up ? ((long long)str) : -((long long)str); \
535  long long stride; \
536  int gtid = __kmp_entry_gtid(); \
537  MKLOC(loc, #func); \
538  \
539  KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \
540  gtid, up, lb, ub, str, chunk_sz )); \
541  \
542  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
543  KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \
544  (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \
545  (schedule) != kmp_sch_static); \
546  status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, \
547  (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
548  if (status) { \
549  KMP_DEBUG_ASSERT(stride == str2); \
550  *p_ub += (str > 0) ? 1 : -1; \
551  } \
552  } \
553  else { \
554  status = 0; \
555  } \
556  \
557  KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \
558  gtid, *p_lb, *p_ub, status)); \
559  return status; \
560  }
561 
562 
563 #define LOOP_RUNTIME_START_ULL(func,schedule) \
564  int func (int up, unsigned long long lb, unsigned long long ub, \
565  unsigned long long str, unsigned long long *p_lb, \
566  unsigned long long *p_ub) \
567  { \
568  int status; \
569  long long str2 = up ? ((long long)str) : -((long long)str); \
570  unsigned long long stride; \
571  unsigned long long chunk_sz = 0; \
572  int gtid = __kmp_entry_gtid(); \
573  MKLOC(loc, #func); \
574  \
575  KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \
576  gtid, up, lb, ub, str, chunk_sz )); \
577  \
578  if ((str > 0) ? (lb < ub) : (lb > ub)) { \
579  KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \
580  (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, TRUE); \
581  status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, \
582  (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
583  if (status) { \
584  KMP_DEBUG_ASSERT(stride == str2); \
585  *p_ub += (str > 0) ? 1 : -1; \
586  } \
587  } \
588  else { \
589  status = 0; \
590  } \
591  \
592  KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \
593  gtid, *p_lb, *p_ub, status)); \
594  return status; \
595  }
596 
597 
598 #define LOOP_NEXT_ULL(func,fini_code) \
599  int func(unsigned long long *p_lb, unsigned long long *p_ub) \
600  { \
601  int status; \
602  long long stride; \
603  int gtid = __kmp_get_gtid(); \
604  MKLOC(loc, #func); \
605  KA_TRACE(20, ( #func ": T#%d\n", gtid)); \
606  \
607  fini_code \
608  status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \
609  (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
610  if (status) { \
611  *p_ub += (stride > 0) ? 1 : -1; \
612  } \
613  \
614  KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \
615  "returning %d\n", gtid, *p_lb, *p_ub, stride, status)); \
616  return status; \
617  }
618 
619 
620 LOOP_START_ULL(GOMP_loop_ull_static_start, kmp_sch_static)
621 LOOP_NEXT_ULL(GOMP_loop_ull_static_next, {})
622 LOOP_START_ULL(GOMP_loop_ull_dynamic_start, kmp_sch_dynamic_chunked)
623 LOOP_NEXT_ULL(GOMP_loop_ull_dynamic_next, {})
624 LOOP_START_ULL(GOMP_loop_ull_guided_start, kmp_sch_guided_chunked)
625 LOOP_NEXT_ULL(GOMP_loop_ull_guided_next, {})
626 LOOP_RUNTIME_START_ULL(GOMP_loop_ull_runtime_start, kmp_sch_runtime)
627 LOOP_NEXT_ULL(GOMP_loop_ull_runtime_next, {})
628 
629 LOOP_START_ULL(GOMP_loop_ull_ordered_static_start, kmp_ord_static)
630 LOOP_NEXT_ULL(GOMP_loop_ull_ordered_static_next, \
631  { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
632 LOOP_START_ULL(GOMP_loop_ull_ordered_dynamic_start, kmp_ord_dynamic_chunked)
633 LOOP_NEXT_ULL(GOMP_loop_ull_ordered_dynamic_next, \
634  { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
635 LOOP_START_ULL(GOMP_loop_ull_ordered_guided_start, kmp_ord_guided_chunked)
636 LOOP_NEXT_ULL(GOMP_loop_ull_ordered_guided_next, \
637  { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
638 LOOP_RUNTIME_START_ULL(GOMP_loop_ull_ordered_runtime_start, kmp_ord_runtime)
639 LOOP_NEXT_ULL(GOMP_loop_ull_ordered_runtime_next, \
640  { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
641 
642 
643 /* */
644 //
645 // Combined parallel / loop worksharing constructs
646 //
647 // There are no ull versions (yet).
648 //
649 
650 #define PARALLEL_LOOP_START(func, schedule) \
651  void func (void (*task) (void *), void *data, unsigned num_threads, \
652  long lb, long ub, long str, long chunk_sz) \
653  { \
654  int gtid = __kmp_entry_gtid(); \
655  int last = FALSE; \
656  MKLOC(loc, #func); \
657  KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \
658  gtid, lb, ub, str, chunk_sz )); \
659  \
660  if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \
661  if (num_threads != 0) { \
662  __kmp_push_num_threads(&loc, gtid, num_threads); \
663  } \
664  __kmp_GOMP_fork_call(&loc, gtid, \
665  (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, \
666  task, data, num_threads, &loc, (schedule), lb, \
667  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \
668  } \
669  else { \
670  __kmpc_serialized_parallel(&loc, gtid); \
671  } \
672  \
673  KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
674  (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
675  (schedule) != kmp_sch_static); \
676  \
677  KA_TRACE(20, ( #func " exit: T#%d\n", gtid)); \
678  }
679 
680 
681 PARALLEL_LOOP_START(GOMP_parallel_loop_static_start, kmp_sch_static)
682 PARALLEL_LOOP_START(GOMP_parallel_loop_dynamic_start, kmp_sch_dynamic_chunked)
683 PARALLEL_LOOP_START(GOMP_parallel_loop_guided_start, kmp_sch_guided_chunked)
684 PARALLEL_LOOP_START(GOMP_parallel_loop_runtime_start, kmp_sch_runtime)
685 
686 
687 #if OMP_30_ENABLED
688 
689 
690 /* */
691 //
692 // Tasking constructs
693 //
694 
695 void
696 GOMP_task(void (*func)(void *), void *data, void (*copy_func)(void *, void *),
697  long arg_size, long arg_align, int if_cond, unsigned gomp_flags)
698 {
699  MKLOC(loc, "GOMP_task");
700  int gtid = __kmp_entry_gtid();
701  kmp_int32 flags = 0;
702  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
703 
704  KA_TRACE(20, ("GOMP_task: T#%d\n", gtid));
705 
706  // The low-order bit is the "tied" flag
707  if (gomp_flags & 1) {
708  input_flags->tiedness = 1;
709  }
710  input_flags->native = 1;
711  // __kmp_task_alloc() sets up all other flags
712 
713  if (! if_cond) {
714  arg_size = 0;
715  }
716 
717  kmp_task_t *task = __kmp_task_alloc(&loc, gtid, input_flags,
718  sizeof(kmp_task_t), arg_size ? arg_size + arg_align - 1 : 0,
719  (kmp_routine_entry_t)func);
720 
721  if (arg_size > 0) {
722  if (arg_align > 0) {
723  task->shareds = (void *)((((size_t)task->shareds)
724  + arg_align - 1) / arg_align * arg_align);
725  }
726  //else error??
727 
728  if (copy_func) {
729  (*copy_func)(task->shareds, data);
730  }
731  else {
732  memcpy(task->shareds, data, arg_size);
733  }
734  }
735 
736  if (if_cond) {
737  __kmpc_omp_task(&loc, gtid, task);
738  }
739  else {
740  __kmpc_omp_task_begin_if0(&loc, gtid, task);
741  func(data);
742  __kmpc_omp_task_complete_if0(&loc, gtid, task);
743  }
744 
745  KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));
746 }
747 
748 
749 void
750 GOMP_taskwait(void)
751 {
752  MKLOC(loc, "GOMP_taskwait");
753  int gtid = __kmp_entry_gtid();
754 
755  KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid));
756 
757  __kmpc_omp_taskwait(&loc, gtid);
758 
759  KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid));
760 }
761 
762 
763 #endif /* OMP_30_ENABLED */
764 
765 
766 /* */
767 //
768 // Sections worksharing constructs
769 //
770 
771 //
772 // For the sections construct, we initialize a dynamically scheduled loop
773 // worksharing construct with lb 1 and stride 1, and use the iteration #'s
774 // that its returns as sections ids.
775 //
776 // There are no special entry points for ordered sections, so we always use
777 // the dynamically scheduled workshare, even if the sections aren't ordered.
778 //
779 
780 unsigned
781 GOMP_sections_start(unsigned count)
782 {
783  int status;
784  kmp_int lb, ub, stride;
785  int gtid = __kmp_entry_gtid();
786  MKLOC(loc, "GOMP_sections_start");
787  KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid));
788 
789  KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
790 
791  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
792  if (status) {
793  KMP_DEBUG_ASSERT(stride == 1);
794  KMP_DEBUG_ASSERT(lb > 0);
795  KMP_ASSERT(lb == ub);
796  }
797  else {
798  lb = 0;
799  }
800 
801  KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid,
802  (unsigned)lb));
803  return (unsigned)lb;
804 }
805 
806 
807 unsigned
808 GOMP_sections_next(void)
809 {
810  int status;
811  kmp_int lb, ub, stride;
812  int gtid = __kmp_get_gtid();
813  MKLOC(loc, "GOMP_sections_next");
814  KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid));
815 
816  status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
817  if (status) {
818  KMP_DEBUG_ASSERT(stride == 1);
819  KMP_DEBUG_ASSERT(lb > 0);
820  KMP_ASSERT(lb == ub);
821  }
822  else {
823  lb = 0;
824  }
825 
826  KA_TRACE(20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid,
827  (unsigned)lb));
828  return (unsigned)lb;
829 }
830 
831 
832 void
833 GOMP_parallel_sections_start(void (*task) (void *), void *data,
834  unsigned num_threads, unsigned count)
835 {
836  int gtid = __kmp_entry_gtid();
837  int last = FALSE;
838  MKLOC(loc, "GOMP_parallel_sections_start");
839  KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));
840 
841  if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
842  if (num_threads != 0) {
843  __kmp_push_num_threads(&loc, gtid, num_threads);
844  }
845  __kmp_GOMP_fork_call(&loc, gtid,
846  (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data,
847  num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1,
848  (kmp_int)count, (kmp_int)1, (kmp_int)1);
849  }
850  else {
851  __kmpc_serialized_parallel(&loc, gtid);
852  }
853 
854  KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
855 
856  KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid));
857 }
858 
859 
860 void
861 GOMP_sections_end(void)
862 {
863  int gtid = __kmp_get_gtid();
864  KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
865 
866  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
867 
868  KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid))
869 }
870 
871 
872 void
873 GOMP_sections_end_nowait(void)
874 {
875  KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid()))
876 }
877 
878 #ifdef __cplusplus
879  } //extern "C"
880 #endif // __cplusplus
881 
882