Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_taskdeps.cpp
1 /*
2  * kmp_taskdeps.cpp
3  * $Revision: 42539 $
4  * $Date: 2013-07-17 11:20:01 -0500 (Wed, 17 Jul 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2014 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 //#define KMP_SUPPORT_GRAPH_OUTPUT 1
38 
39 #include "kmp.h"
40 #include "kmp_io.h"
41 #include "kmp_wait_release.h"
42 
43 #if OMP_40_ENABLED
44 
45 //TODO: Improve memory allocation? keep a list of pre-allocated structures? allocate in blocks? re-use list finished list entries?
46 //TODO: don't use atomic ref counters for stack-allocated nodes.
47 //TODO: find an alternate to atomic refs for heap-allocated nodes?
48 //TODO: Finish graph output support
49 //TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other runtime locks
50 //TODO: Any ITT support needed?
51 
52 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
53 static kmp_int32 kmp_node_id_seed = 0;
54 #endif
55 
56 static void
57 __kmp_init_node ( kmp_depnode_t *node )
58 {
59  node->dn.task = NULL; // set to null initially, it will point to the right task once dependences have been processed
60  node->dn.successors = NULL;
61  __kmp_init_lock(&node->dn.lock);
62  node->dn.nrefs = 1; // init creates the first reference to the node
63 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
64  node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed);
65 #endif
66 }
67 
68 static inline kmp_depnode_t *
69 __kmp_node_ref ( kmp_depnode_t *node )
70 {
71  KMP_TEST_THEN_INC32(&node->dn.nrefs);
72  return node;
73 }
74 
75 static inline void
76 __kmp_node_deref ( kmp_info_t *thread, kmp_depnode_t *node )
77 {
78  if (!node) return;
79 
80  kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1;
81  if ( n == 0 ) {
82  KMP_ASSERT(node->dn.nrefs == 0);
83 #if USE_FAST_MEMORY
84  __kmp_fast_free(thread,node);
85 #else
86  __kmp_thread_free(thread,node);
87 #endif
88  }
89 }
90 
91 #define KMP_ACQUIRE_DEPNODE(gtid,n) __kmp_acquire_lock(&(n)->dn.lock,(gtid))
92 #define KMP_RELEASE_DEPNODE(gtid,n) __kmp_release_lock(&(n)->dn.lock,(gtid))
93 
94 static void
95 __kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list );
96 
97 static const kmp_int32 kmp_dephash_log2 = 6;
98 static const kmp_int32 kmp_dephash_size = (1 << kmp_dephash_log2);
99 
100 static inline kmp_int32
101 __kmp_dephash_hash ( kmp_intptr_t addr )
102 {
103  //TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % m_num_sets );
104  return ((addr >> kmp_dephash_log2) ^ addr) % kmp_dephash_size;
105 }
106 
107 static kmp_dephash_t *
108 __kmp_dephash_create ( kmp_info_t *thread )
109 {
110  kmp_dephash_t *h;
111 
112  kmp_int32 size = kmp_dephash_size * sizeof(kmp_dephash_entry_t) + sizeof(kmp_dephash_t);
113 
114 #if USE_FAST_MEMORY
115  h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size );
116 #else
117  h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size );
118 #endif
119 
120 #ifdef KMP_DEBUG
121  h->nelements = 0;
122 #endif
123  h->buckets = (kmp_dephash_entry **)(h+1);
124 
125  for ( kmp_int32 i = 0; i < kmp_dephash_size; i++ )
126  h->buckets[i] = 0;
127 
128  return h;
129 }
130 
131 static void
132 __kmp_dephash_free ( kmp_info_t *thread, kmp_dephash_t *h )
133 {
134  for ( kmp_int32 i=0; i < kmp_dephash_size; i++ ) {
135  if ( h->buckets[i] ) {
136  kmp_dephash_entry_t *next;
137  for ( kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next ) {
138  next = entry->next_in_bucket;
139  __kmp_depnode_list_free(thread,entry->last_ins);
140  __kmp_node_deref(thread,entry->last_out);
141 #if USE_FAST_MEMORY
142  __kmp_fast_free(thread,entry);
143 #else
144  __kmp_thread_free(thread,entry);
145 #endif
146  }
147  }
148  }
149 #if USE_FAST_MEMORY
150  __kmp_fast_free(thread,h);
151 #else
152  __kmp_thread_free(thread,h);
153 #endif
154 }
155 
156 static kmp_dephash_entry *
157 __kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr )
158 {
159  kmp_int32 bucket = __kmp_dephash_hash(addr);
160 
161  kmp_dephash_entry_t *entry;
162  for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket )
163  if ( entry->addr == addr ) break;
164 
165  if ( entry == NULL ) {
166  // create entry. This is only done by one thread so no locking required
167 #if USE_FAST_MEMORY
168  entry = (kmp_dephash_entry_t *) __kmp_fast_allocate( thread, sizeof(kmp_dephash_entry_t) );
169 #else
170  entry = (kmp_dephash_entry_t *) __kmp_thread_malloc( thread, sizeof(kmp_dephash_entry_t) );
171 #endif
172  entry->addr = addr;
173  entry->last_out = NULL;
174  entry->last_ins = NULL;
175  entry->next_in_bucket = h->buckets[bucket];
176  h->buckets[bucket] = entry;
177 #ifdef KMP_DEBUG
178  h->nelements++;
179  if ( entry->next_in_bucket ) h->nconflicts++;
180 #endif
181  }
182  return entry;
183 }
184 
185 static kmp_depnode_list_t *
186 __kmp_add_node ( kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node )
187 {
188  kmp_depnode_list_t *new_head;
189 
190 #if USE_FAST_MEMORY
191  new_head = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t));
192 #else
193  new_head = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t));
194 #endif
195 
196  new_head->node = __kmp_node_ref(node);
197  new_head->next = list;
198 
199  return new_head;
200 }
201 
202 static void
203 __kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list )
204 {
205  kmp_depnode_list *next;
206 
207  for ( ; list ; list = next ) {
208  next = list->next;
209 
210  __kmp_node_deref(thread,list->node);
211 #if USE_FAST_MEMORY
212  __kmp_fast_free(thread,list);
213 #else
214  __kmp_thread_free(thread,list);
215 #endif
216  }
217 }
218 
219 static inline void
220 __kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink )
221 {
222 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
223  kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
224  kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink->dn.task); // this can be NULL when if(0) ...
225 
226  __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource);
227 #endif
228 }
229 
230 template< bool filter >
231 static inline kmp_int32
232 __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
233  bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list)
234 {
235  KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d depencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) );
236 
237  kmp_info_t *thread = __kmp_threads[ gtid ];
238  kmp_int32 npredecessors=0;
239  for ( kmp_int32 i = 0; i < ndeps ; i++ ) {
240  const kmp_depend_info_t * dep = &dep_list[i];
241 
242  KMP_DEBUG_ASSERT(dep->flags.in);
243 
244  if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries
245 
246  kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr);
247  kmp_depnode_t *last_out = info->last_out;
248 
249  if ( dep->flags.out && info->last_ins ) {
250  for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) {
251  kmp_depnode_t * indep = p->node;
252  if ( indep->dn.task ) {
253  KMP_ACQUIRE_DEPNODE(gtid,indep);
254  if ( indep->dn.task ) {
255  __kmp_track_dependence(indep,node);
256  indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node);
257  KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p",
258  filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task)));
259  npredecessors++;
260  }
261  KMP_RELEASE_DEPNODE(gtid,indep);
262  }
263  }
264 
265  __kmp_depnode_list_free(thread,info->last_ins);
266  info->last_ins = NULL;
267 
268  } else if ( last_out && last_out->dn.task ) {
269  KMP_ACQUIRE_DEPNODE(gtid,last_out);
270  if ( last_out->dn.task ) {
271  __kmp_track_dependence(last_out,node);
272  last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node);
273  KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p",
274  filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task)));
275 
276  npredecessors++;
277  }
278  KMP_RELEASE_DEPNODE(gtid,last_out);
279  }
280 
281  if ( dep_barrier ) {
282  // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after
283  // the execution of this task so the previous output nodes can be cleared.
284  __kmp_node_deref(thread,last_out);
285  info->last_out = NULL;
286  } else {
287  if ( dep->flags.out ) {
288  __kmp_node_deref(thread,last_out);
289  info->last_out = __kmp_node_ref(node);
290  } else
291  info->last_ins = __kmp_add_node(thread, info->last_ins, node);
292  }
293 
294  }
295 
296  KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) );
297 
298  return npredecessors;
299 }
300 
301 #define NO_DEP_BARRIER (false)
302 #define DEP_BARRIER (true)
303 
304 // returns true if the task has any outstanding dependence
305 static bool
306 __kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t *hash, bool dep_barrier,
307  kmp_int32 ndeps, kmp_depend_info_t *dep_list,
308  kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
309 {
310  int i;
311 
312  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
313  KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d possibly aliased dependencies, %d non-aliased depedencies : dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ) );
314 
315  // Filter deps in dep_list
316  // TODO: Different algorithm for large dep_list ( > 10 ? )
317  for ( i = 0; i < ndeps; i ++ ) {
318  if ( dep_list[i].base_addr != 0 )
319  for ( int j = i+1; j < ndeps; j++ )
320  if ( dep_list[i].base_addr == dep_list[j].base_addr ) {
321  dep_list[i].flags.in |= dep_list[j].flags.in;
322  dep_list[i].flags.out |= dep_list[j].flags.out;
323  dep_list[j].base_addr = 0; // Mark j element as void
324  }
325  }
326 
327  // doesn't need to be atomic as no other thread is going to be accessing this node just yet
328  // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies
329  node->dn.npredecessors = -1;
330 
331  // used to pack all npredecessors additions into a single atomic operation at the end
332  int npredecessors;
333 
334  npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps, dep_list);
335  npredecessors += __kmp_process_deps<false>(gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list);
336 
337  node->dn.task = task;
338  KMP_MB();
339 
340  // Account for our initial fake value
341  npredecessors++;
342 
343  // Update predecessors and obtain current value to check if there are still any outstandig dependences (some tasks may have finished while we processed the dependences)
344  npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) + npredecessors;
345 
346  KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", gtid, npredecessors, taskdata ) );
347 
348  // beyond this point the task could be queued (and executed) by a releasing task...
349  return npredecessors > 0 ? true : false;
350 }
351 
352 void
353 __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task )
354 {
355  kmp_info_t *thread = __kmp_threads[ gtid ];
356  kmp_depnode_t *node = task->td_depnode;
357 
358  if ( task->td_dephash ) {
359  KA_TRACE(40, ("__kmp_realease_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) );
360  __kmp_dephash_free(thread,task->td_dephash);
361  }
362 
363  if ( !node ) return;
364 
365  KA_TRACE(20, ("__kmp_realease_deps: T#%d notifying succesors of task %p.\n", gtid, task ) );
366 
367  KMP_ACQUIRE_DEPNODE(gtid,node);
368  node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated
369  KMP_RELEASE_DEPNODE(gtid,node);
370 
371  kmp_depnode_list_t *next;
372  for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) {
373  kmp_depnode_t *successor = p->node;
374  kmp_int32 npredecessors = KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1;
375 
376  // successor task can be NULL for wait_depends or because deps are still being processed
377  if ( npredecessors == 0 ) {
378  KMP_MB();
379  if ( successor->dn.task ) {
380  KA_TRACE(20, ("__kmp_realease_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) );
381  __kmp_omp_task(gtid,successor->dn.task,false);
382  }
383  }
384 
385  next = p->next;
386  __kmp_node_deref(thread,p->node);
387 #if USE_FAST_MEMORY
388  __kmp_fast_free(thread,p);
389 #else
390  __kmp_thread_free(thread,p);
391 #endif
392  }
393 
394  __kmp_node_deref(thread,node);
395 
396  KA_TRACE(20, ("__kmp_realease_deps: T#%d all successors of %p notified of completation\n", gtid, task ) );
397 }
398 
413 kmp_int32
414 __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
415  kmp_int32 ndeps, kmp_depend_info_t *dep_list,
416  kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
417 {
418 
419  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
420  KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n",
421  gtid, loc_ref, new_taskdata ) );
422 
423  kmp_info_t *thread = __kmp_threads[ gtid ];
424  kmp_taskdata_t * current_task = thread->th.th_current_task;
425 
426  bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final;
427 
428  if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) {
429  /* if no dependencies have been tracked yet, create the dependence hash */
430  if ( current_task->td_dephash == NULL )
431  current_task->td_dephash = __kmp_dephash_create(thread);
432 
433 #if USE_FAST_MEMORY
434  kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_t));
435 #else
436  kmp_depnode_t *node = (kmp_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_t));
437 #endif
438 
439  __kmp_init_node(node);
440  new_taskdata->td_depnode = node;
441 
442  if ( __kmp_check_deps( gtid, node, new_task, current_task->td_dephash, NO_DEP_BARRIER,
443  ndeps, dep_list, ndeps_noalias,noalias_dep_list ) ) {
444  KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking dependencies: "
445  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
446  new_taskdata ) );
447  return TASK_CURRENT_NOT_QUEUED;
448  }
449  }
450 
451  KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking dependencies : "
452  "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref,
453  new_taskdata ) );
454 
455  return __kmpc_omp_task(loc_ref,gtid,new_task);
456 }
457 
469 void
470 __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
471  kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
472 {
473  KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref) );
474 
475  if ( ndeps == 0 && ndeps_noalias == 0 ) {
476  KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to wait upon : loc=%p\n", gtid, loc_ref) );
477  return;
478  }
479 
480  kmp_info_t *thread = __kmp_threads[ gtid ];
481  kmp_taskdata_t * current_task = thread->th.th_current_task;
482 
483  // We can return immediately as:
484  // - dependences are not computed in serial teams
485  // - if the dephash is not yet created it means we have nothing to wait for
486  if ( current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final || current_task->td_dephash == NULL ) {
487  KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) );
488  return;
489  }
490 
491  kmp_depnode_t node;
492  __kmp_init_node(&node);
493 
494  if (!__kmp_check_deps( gtid, &node, NULL, current_task->td_dephash, DEP_BARRIER,
495  ndeps, dep_list, ndeps_noalias, noalias_dep_list )) {
496  KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) );
497  return;
498  }
499 
500  int thread_finished = FALSE;
501  kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U);
502  while ( node.dn.npredecessors > 0 ) {
503  flag.execute_tasks(thread, gtid, FALSE, &thread_finished,
504 #if USE_ITT_BUILD
505  NULL,
506 #endif
507  __kmp_task_stealing_constraint );
508  }
509 
510  KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref) );
511 }
512 
513 #endif /* OMP_40_ENABLED */
514 
void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
Definition: kmp.h:218