Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
z_Windows_NT_util.c
1 /*
2  * z_Windows_NT_util.c -- platform specific routines.
3  * $Revision: 42518 $
4  * $Date: 2013-07-15 11:12:26 -0500 (Mon, 15 Jul 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp.h"
38 #include "kmp_itt.h"
39 #include "kmp_i18n.h"
40 #include "kmp_io.h"
41 
42 
43 
44 /* ----------------------------------------------------------------------------------- */
45 /* ----------------------------------------------------------------------------------- */
46 
47 /* This code is related to NtQuerySystemInformation() function. This function
48  is used in the Load balance algorithm for OMP_DYNAMIC=true to find the
49  number of running threads in the system. */
50 
51 #include <ntstatus.h>
52 #include <ntsecapi.h> // UNICODE_STRING
53 
54 enum SYSTEM_INFORMATION_CLASS {
55  SystemProcessInformation = 5
56 }; // SYSTEM_INFORMATION_CLASS
57 
58 struct CLIENT_ID {
59  HANDLE UniqueProcess;
60  HANDLE UniqueThread;
61 }; // struct CLIENT_ID
62 
63 enum THREAD_STATE {
64  StateInitialized,
65  StateReady,
66  StateRunning,
67  StateStandby,
68  StateTerminated,
69  StateWait,
70  StateTransition,
71  StateUnknown
72 }; // enum THREAD_STATE
73 
74 struct VM_COUNTERS {
75  SIZE_T PeakVirtualSize;
76  SIZE_T VirtualSize;
77  ULONG PageFaultCount;
78  SIZE_T PeakWorkingSetSize;
79  SIZE_T WorkingSetSize;
80  SIZE_T QuotaPeakPagedPoolUsage;
81  SIZE_T QuotaPagedPoolUsage;
82  SIZE_T QuotaPeakNonPagedPoolUsage;
83  SIZE_T QuotaNonPagedPoolUsage;
84  SIZE_T PagefileUsage;
85  SIZE_T PeakPagefileUsage;
86  SIZE_T PrivatePageCount;
87 }; // struct VM_COUNTERS
88 
89 struct SYSTEM_THREAD {
90  LARGE_INTEGER KernelTime;
91  LARGE_INTEGER UserTime;
92  LARGE_INTEGER CreateTime;
93  ULONG WaitTime;
94  LPVOID StartAddress;
95  CLIENT_ID ClientId;
96  DWORD Priority;
97  LONG BasePriority;
98  ULONG ContextSwitchCount;
99  THREAD_STATE State;
100  ULONG WaitReason;
101 }; // SYSTEM_THREAD
102 
103 KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, KernelTime ) == 0 );
104 #if KMP_ARCH_X86
105  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 28 );
106  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 52 );
107 #else
108  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 32 );
109  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 68 );
110 #endif
111 
112 struct SYSTEM_PROCESS_INFORMATION {
113  ULONG NextEntryOffset;
114  ULONG NumberOfThreads;
115  LARGE_INTEGER Reserved[ 3 ];
116  LARGE_INTEGER CreateTime;
117  LARGE_INTEGER UserTime;
118  LARGE_INTEGER KernelTime;
119  UNICODE_STRING ImageName;
120  DWORD BasePriority;
121  HANDLE ProcessId;
122  HANDLE ParentProcessId;
123  ULONG HandleCount;
124  ULONG Reserved2[ 2 ];
125  VM_COUNTERS VMCounters;
126  IO_COUNTERS IOCounters;
127  SYSTEM_THREAD Threads[ 1 ];
128 }; // SYSTEM_PROCESS_INFORMATION
129 typedef SYSTEM_PROCESS_INFORMATION * PSYSTEM_PROCESS_INFORMATION;
130 
131 KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, NextEntryOffset ) == 0 );
132 KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, CreateTime ) == 32 );
133 KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ImageName ) == 56 );
134 #if KMP_ARCH_X86
135  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 68 );
136  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 76 );
137  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 88 );
138  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 136 );
139  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 184 );
140 #else
141  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 80 );
142  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 96 );
143  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 112 );
144  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 208 );
145  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 256 );
146 #endif
147 
148 typedef NTSTATUS (NTAPI *NtQuerySystemInformation_t)( SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG );
149 NtQuerySystemInformation_t NtQuerySystemInformation = NULL;
150 
151 HMODULE ntdll = NULL;
152 
153 /* End of NtQuerySystemInformation()-related code */
154 
155 #if KMP_ARCH_X86_64
156 static HMODULE kernel32 = NULL;
157 #endif /* KMP_ARCH_X86_64 */
158 
159 /* ----------------------------------------------------------------------------------- */
160 /* ----------------------------------------------------------------------------------- */
161 
162 
163 // Why do we have multiple copies of __kmp_static_delay() and __kmp_static_yield() in many files?
164 #ifdef KMP_DEBUG
165 
166 static void
167 __kmp_static_delay( int arg ) {
168  /* Work around weird code-gen bug that causes assert to trip */
169  #if KMP_ARCH_X86_64 && KMP_OS_LINUX
170  KMP_ASSERT( arg != 0 );
171  #else
172  KMP_ASSERT( arg >= 0 );
173  #endif
174 }
175 
176 #else
177 
178  #define __kmp_static_delay( arg ) /* nothing to do */
179 
180 #endif /* KMP_DEBUG */
181 
182 static void
183 __kmp_static_yield( int arg )
184 {
185  __kmp_yield( arg );
186 }
187 
188 #if KMP_HANDLE_SIGNALS
189  typedef void (* sig_func_t )( int );
190  static sig_func_t __kmp_sighldrs[ NSIG ];
191  static int __kmp_siginstalled[ NSIG ];
192 #endif
193 
194 static HANDLE __kmp_monitor_ev;
195 static kmp_int64 __kmp_win32_time;
196 double __kmp_win32_tick;
197 
198 int __kmp_init_runtime = FALSE;
199 CRITICAL_SECTION __kmp_win32_section;
200 
201 void
202 __kmp_win32_mutex_init( kmp_win32_mutex_t *mx )
203 {
204  InitializeCriticalSection( & mx->cs );
205 #if USE_ITT_BUILD
206  __kmp_itt_system_object_created( & mx->cs, "Critical Section" );
207 #endif /* USE_ITT_BUILD */
208 }
209 
210 void
211 __kmp_win32_mutex_destroy( kmp_win32_mutex_t *mx )
212 {
213  DeleteCriticalSection( & mx->cs );
214 }
215 
216 void
217 __kmp_win32_mutex_lock( kmp_win32_mutex_t *mx )
218 {
219  EnterCriticalSection( & mx->cs );
220 }
221 
222 void
223 __kmp_win32_mutex_unlock( kmp_win32_mutex_t *mx )
224 {
225  LeaveCriticalSection( & mx->cs );
226 }
227 
228 void
229 __kmp_win32_cond_init( kmp_win32_cond_t *cv )
230 {
231  cv->waiters_count_ = 0;
232  cv->wait_generation_count_ = 0;
233  cv->release_count_ = 0;
234 
235  /* Initialize the critical section */
236  __kmp_win32_mutex_init( & cv->waiters_count_lock_ );
237 
238  /* Create a manual-reset event. */
239  cv->event_ = CreateEvent( NULL, // no security
240  TRUE, // manual-reset
241  FALSE, // non-signaled initially
242  NULL ); // unnamed
243 #if USE_ITT_BUILD
244  __kmp_itt_system_object_created( cv->event_, "Event" );
245 #endif /* USE_ITT_BUILD */
246 }
247 
248 void
249 __kmp_win32_cond_destroy( kmp_win32_cond_t *cv )
250 {
251  __kmp_win32_mutex_destroy( & cv->waiters_count_lock_ );
252  __kmp_free_handle( cv->event_ );
253  memset( cv, '\0', sizeof( *cv ) );
254 }
255 
256 /* TODO associate cv with a team instead of a thread so as to optimize
257  * the case where we wake up a whole team */
258 
259 void
260 __kmp_win32_cond_wait( kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, kmp_info_t *th, int need_decrease_load )
261 {
262  int my_generation;
263  int last_waiter;
264 
265  /* Avoid race conditions */
266  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
267 
268  /* Increment count of waiters */
269  cv->waiters_count_++;
270 
271  /* Store current generation in our activation record. */
272  my_generation = cv->wait_generation_count_;
273 
274  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
275  __kmp_win32_mutex_unlock( mx );
276 
277 
278  for (;;) {
279  int wait_done;
280 
281  /* Wait until the event is signaled */
282  WaitForSingleObject( cv->event_, INFINITE );
283 
284  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
285 
286  /* Exit the loop when the <cv->event_> is signaled and
287  * there are still waiting threads from this <wait_generation>
288  * that haven't been released from this wait yet. */
289  wait_done = ( cv->release_count_ > 0 ) &&
290  ( cv->wait_generation_count_ != my_generation );
291 
292  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_);
293 
294  /* there used to be a semicolon after the if statement,
295  * it looked like a bug, so i removed it */
296  if( wait_done )
297  break;
298  }
299 
300  __kmp_win32_mutex_lock( mx );
301  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
302 
303  cv->waiters_count_--;
304  cv->release_count_--;
305 
306  last_waiter = ( cv->release_count_ == 0 );
307 
308  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
309 
310  if( last_waiter ) {
311  /* We're the last waiter to be notified, so reset the manual event. */
312  ResetEvent( cv->event_ );
313  }
314 }
315 
316 void
317 __kmp_win32_cond_broadcast( kmp_win32_cond_t *cv )
318 {
319  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
320 
321  if( cv->waiters_count_ > 0 ) {
322  SetEvent( cv->event_ );
323  /* Release all the threads in this generation. */
324 
325  cv->release_count_ = cv->waiters_count_;
326 
327  /* Start a new generation. */
328  cv->wait_generation_count_++;
329  }
330 
331  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
332 }
333 
334 void
335 __kmp_win32_cond_signal( kmp_win32_cond_t *cv )
336 {
337  __kmp_win32_cond_broadcast( cv );
338 }
339 
340 /* ------------------------------------------------------------------------ */
341 /* ------------------------------------------------------------------------ */
342 
343 void
344 __kmp_enable( int new_state )
345 {
346  if (__kmp_init_runtime)
347  LeaveCriticalSection( & __kmp_win32_section );
348 }
349 
350 void
351 __kmp_disable( int *old_state )
352 {
353  *old_state = 0;
354 
355  if (__kmp_init_runtime)
356  EnterCriticalSection( & __kmp_win32_section );
357 }
358 
359 void
360 __kmp_suspend_initialize( void )
361 {
362  /* do nothing */
363 }
364 
365 static void
366 __kmp_suspend_initialize_thread( kmp_info_t *th )
367 {
368  if ( ! TCR_4( th->th.th_suspend_init ) ) {
369  /* this means we haven't initialized the suspension pthread objects for this thread
370  in this instance of the process */
371  __kmp_win32_cond_init( &th->th.th_suspend_cv );
372  __kmp_win32_mutex_init( &th->th.th_suspend_mx );
373  TCW_4( th->th.th_suspend_init, TRUE );
374  }
375 }
376 
377 void
378 __kmp_suspend_uninitialize_thread( kmp_info_t *th )
379 {
380  if ( TCR_4( th->th.th_suspend_init ) ) {
381  /* this means we have initialize the suspension pthread objects for this thread
382  in this instance of the process */
383  __kmp_win32_cond_destroy( & th->th.th_suspend_cv );
384  __kmp_win32_mutex_destroy( & th->th.th_suspend_mx );
385  TCW_4( th->th.th_suspend_init, FALSE );
386  }
387 }
388 
389 /*
390  * This routine puts the calling thread to sleep after setting the
391  * sleep bit for the indicated spin variable to true.
392  */
393 
394 void
395 __kmp_suspend( int th_gtid, volatile kmp_uint *spinner, kmp_uint checker )
396 {
397  kmp_info_t *th = __kmp_threads[th_gtid];
398  int status;
399  kmp_uint old_spin;
400 
401  KF_TRACE( 30, ("__kmp_suspend: T#%d enter for spin = %p\n", th_gtid, spinner ) );
402 
403  __kmp_suspend_initialize_thread( th );
404 
405  __kmp_win32_mutex_lock( &th->th.th_suspend_mx );
406 
407  KF_TRACE( 10, ( "__kmp_suspend: T#%d setting sleep bit for spin(%p)\n",
408  th_gtid, spinner ) );
409 
410  /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread
411  gets called first?
412  */
413  old_spin = __kmp_test_then_or32( (volatile kmp_int32 *) spinner,
414  KMP_BARRIER_SLEEP_STATE );
415 
416  KF_TRACE( 5, ( "__kmp_suspend: T#%d set sleep bit for spin(%p)==%d\n",
417  th_gtid, spinner, *spinner ) );
418 
419  if ( old_spin == checker ) {
420  __kmp_test_then_and32( (volatile kmp_int32 *) spinner, ~(KMP_BARRIER_SLEEP_STATE) );
421 
422  KF_TRACE( 5, ( "__kmp_suspend: T#%d false alarm, reset sleep bit for spin(%p)\n",
423  th_gtid, spinner) );
424  } else {
425 #ifdef DEBUG_SUSPEND
426  __kmp_suspend_count++;
427 #endif
428 
429  /* Encapsulate in a loop as the documentation states that this may
430  * "with low probability" return when the condition variable has
431  * not been signaled or broadcast
432  */
433  int deactivated = FALSE;
434  TCW_PTR(th->th.th_sleep_loc, spinner);
435  while ( TCR_4( *spinner ) & KMP_BARRIER_SLEEP_STATE ) {
436 
437  KF_TRACE( 15, ("__kmp_suspend: T#%d about to perform kmp_win32_cond_wait()\n",
438  th_gtid ) );
439 
440 
441  //
442  // Mark the thread as no longer active
443  // (only in the first iteration of the loop).
444  //
445  if ( ! deactivated ) {
446  th->th.th_active = FALSE;
447  if ( th->th.th_active_in_pool ) {
448  th->th.th_active_in_pool = FALSE;
449  KMP_TEST_THEN_DEC32(
450  (kmp_int32 *) &__kmp_thread_pool_active_nth );
451  KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
452  }
453  deactivated = TRUE;
454 
455 
456  __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 );
457  }
458  else {
459  __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 );
460  }
461 
462 #ifdef KMP_DEBUG
463  if( (*spinner) & KMP_BARRIER_SLEEP_STATE ) {
464  KF_TRACE( 100, ("__kmp_suspend: T#%d spurious wakeup\n", th_gtid ));
465  }
466 #endif /* KMP_DEBUG */
467 
468  } // while
469 
470  //
471  // Mark the thread as active again
472  // (if it was previous marked as inactive)
473  //
474  if ( deactivated ) {
475  th->th.th_active = TRUE;
476  if ( TCR_4(th->th.th_in_pool) ) {
477  KMP_TEST_THEN_INC32(
478  (kmp_int32 *) &__kmp_thread_pool_active_nth );
479  th->th.th_active_in_pool = TRUE;
480  }
481  }
482  }
483 
484 
485  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
486 
487  KF_TRACE( 30, ("__kmp_suspend: T#%d exit\n", th_gtid ) );
488 }
489 
490 /* This routine signals the thread specified by target_gtid to wake up
491  * after setting the sleep bit indicated by the spin argument to FALSE
492  */
493 void
494 __kmp_resume( int target_gtid, volatile kmp_uint *spin )
495 {
496  kmp_info_t *th = __kmp_threads[target_gtid];
497  int status;
498  kmp_uint32 old_spin;
499 
500 #ifdef KMP_DEBUG
501  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
502 #endif
503 
504  KF_TRACE( 30, ( "__kmp_resume: T#%d wants to wakeup T#%d enter\n",
505  gtid, target_gtid ) );
506 
507  __kmp_suspend_initialize_thread( th );
508 
509  __kmp_win32_mutex_lock( &th->th.th_suspend_mx );
510 
511  if ( spin == NULL ) {
512  spin = (volatile kmp_uint *)TCR_PTR(th->th.th_sleep_loc);
513  if ( spin == NULL ) {
514  KF_TRACE( 5, ( "__kmp_resume: T#%d exiting, thread T#%d already awake - spin(%p)\n",
515  gtid, target_gtid, spin ) );
516 
517  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
518  return;
519  }
520  }
521 
522  TCW_PTR(th->th.th_sleep_loc, NULL);
523  old_spin = __kmp_test_then_and32( (kmp_int32 volatile *) spin, ~( KMP_BARRIER_SLEEP_STATE ) );
524 
525  if ( ( old_spin & KMP_BARRIER_SLEEP_STATE ) == 0 ) {
526  KF_TRACE( 5, ( "__kmp_resume: T#%d exiting, thread T#%d already awake - spin(%p): "
527  "%u => %u\n",
528  gtid, target_gtid, spin, old_spin, *spin ) );
529 
530  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
531  return;
532  }
533  TCW_PTR(th->th.th_sleep_loc, NULL);
534 
535  KF_TRACE( 5, ( "__kmp_resume: T#%d about to wakeup T#%d, reset sleep bit for spin(%p)\n",
536  gtid, target_gtid, spin) );
537 
538 
539  __kmp_win32_cond_signal( &th->th.th_suspend_cv );
540 
541  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
542 
543  KF_TRACE( 30, ( "__kmp_resume: T#%d exiting after signaling wake up for T#%d\n",
544  gtid, target_gtid ) );
545 }
546 
547 /* ------------------------------------------------------------------------ */
548 /* ------------------------------------------------------------------------ */
549 
550 void
551 __kmp_yield( int cond )
552 {
553  if (cond)
554  Sleep(0);
555 }
556 
557 /* ------------------------------------------------------------------------ */
558 /* ------------------------------------------------------------------------ */
559 
560 void
561 __kmp_gtid_set_specific( int gtid )
562 {
563  KA_TRACE( 50, ("__kmp_gtid_set_specific: T#%d key:%d\n",
564  gtid, __kmp_gtid_threadprivate_key ));
565  KMP_ASSERT( __kmp_init_runtime );
566  if( ! TlsSetValue( __kmp_gtid_threadprivate_key, (LPVOID)(gtid+1)) )
567  KMP_FATAL( TLSSetValueFailed );
568 }
569 
570 int
571 __kmp_gtid_get_specific()
572 {
573  int gtid;
574  if( !__kmp_init_runtime ) {
575  KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) );
576  return KMP_GTID_SHUTDOWN;
577  }
578  gtid = (int)(kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key );
579  if ( gtid == 0 ) {
580  gtid = KMP_GTID_DNE;
581  }
582  else {
583  gtid--;
584  }
585  KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
586  __kmp_gtid_threadprivate_key, gtid ));
587  return gtid;
588 }
589 
590 /* ------------------------------------------------------------------------ */
591 /* ------------------------------------------------------------------------ */
592 
593 #if KMP_ARCH_X86_64
594 
595 //
596 // Only 1 DWORD in the mask should have any procs set.
597 // Return the appropriate index, or -1 for an invalid mask.
598 //
599 int
600 __kmp_get_proc_group( kmp_affin_mask_t const *mask )
601 {
602  int i;
603  int group = -1;
604  struct GROUP_AFFINITY new_ga, prev_ga;
605  for (i = 0; i < __kmp_num_proc_groups; i++) {
606  if (mask[i] == 0) {
607  continue;
608  }
609  if (group >= 0) {
610  return -1;
611  }
612  group = i;
613  }
614  return group;
615 }
616 
617 #endif /* KMP_ARCH_X86_64 */
618 
619 int
620 __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
621 {
622 
623 #if KMP_ARCH_X86_64
624 
625  if (__kmp_num_proc_groups > 1) {
626  //
627  // Check for a valid mask.
628  //
629  struct GROUP_AFFINITY ga;
630  int group = __kmp_get_proc_group( mask );
631  if (group < 0) {
632  if (abort_on_error) {
633  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
634  }
635  return -1;
636  }
637 
638  //
639  // Transform the bit vector into a GROUP_AFFINITY struct
640  // and make the system call to set affinity.
641  //
642  ga.group = group;
643  ga.mask = mask[group];
644  ga.reserved[0] = ga.reserved[1] = ga.reserved[2] = 0;
645 
646  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
647  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
648  DWORD error = GetLastError();
649  if (abort_on_error) {
650  __kmp_msg(
651  kmp_ms_fatal,
652  KMP_MSG( CantSetThreadAffMask ),
653  KMP_ERR( error ),
654  __kmp_msg_null
655  );
656  }
657  return error;
658  }
659  }
660  else
661 
662 #endif /* KMP_ARCH_X86_64 */
663 
664  {
665  if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
666  DWORD error = GetLastError();
667  if (abort_on_error) {
668  __kmp_msg(
669  kmp_ms_fatal,
670  KMP_MSG( CantSetThreadAffMask ),
671  KMP_ERR( error ),
672  __kmp_msg_null
673  );
674  }
675  return error;
676  }
677  }
678  return 0;
679 }
680 
681 int
682 __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
683 {
684 
685 #if KMP_ARCH_X86_64
686 
687  if (__kmp_num_proc_groups > 1) {
688  KMP_CPU_ZERO(mask);
689  struct GROUP_AFFINITY ga;
690  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
691 
692  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
693  DWORD error = GetLastError();
694  if (abort_on_error) {
695  __kmp_msg(
696  kmp_ms_fatal,
697  KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
698  KMP_ERR(error),
699  __kmp_msg_null
700  );
701  }
702  return error;
703  }
704 
705  if ((ga.group < 0) || (ga.group > __kmp_num_proc_groups)
706  || (ga.mask == 0)) {
707  return -1;
708  }
709 
710  mask[ga.group] = ga.mask;
711  }
712  else
713 
714 #endif /* KMP_ARCH_X86_64 */
715 
716  {
717  kmp_affin_mask_t newMask, sysMask, retval;
718 
719  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
720  DWORD error = GetLastError();
721  if (abort_on_error) {
722  __kmp_msg(
723  kmp_ms_fatal,
724  KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
725  KMP_ERR(error),
726  __kmp_msg_null
727  );
728  }
729  return error;
730  }
731  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
732  if (! retval) {
733  DWORD error = GetLastError();
734  if (abort_on_error) {
735  __kmp_msg(
736  kmp_ms_fatal,
737  KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
738  KMP_ERR(error),
739  __kmp_msg_null
740  );
741  }
742  return error;
743  }
744  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
745  if (! newMask) {
746  DWORD error = GetLastError();
747  if (abort_on_error) {
748  __kmp_msg(
749  kmp_ms_fatal,
750  KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
751  KMP_ERR(error),
752  __kmp_msg_null
753  );
754  }
755  }
756  *mask = retval;
757  }
758  return 0;
759 }
760 
761 void
762 __kmp_affinity_bind_thread( int proc )
763 {
764 
765 #if KMP_ARCH_X86_64
766 
767  if (__kmp_num_proc_groups > 1) {
768  //
769  // Form the GROUP_AFFINITY struct directly, rather than filling
770  // out a bit vector and calling __kmp_set_system_affinity().
771  //
772  struct GROUP_AFFINITY ga;
773  KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups
774  * CHAR_BIT * sizeof(DWORD_PTR))));
775  ga.group = proc / (CHAR_BIT * sizeof(DWORD_PTR));
776  ga.mask = 1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR)));
777  ga.reserved[0] = ga.reserved[1] = ga.reserved[2] = 0;
778 
779  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
780  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
781  DWORD error = GetLastError();
782  if (__kmp_affinity_verbose) { // AC: continue silently if not verbose
783  __kmp_msg(
784  kmp_ms_warning,
785  KMP_MSG( CantSetThreadAffMask ),
786  KMP_ERR( error ),
787  __kmp_msg_null
788  );
789  }
790  }
791  }
792  else
793 
794 #endif /* KMP_ARCH_X86_64 */
795 
796  {
797  kmp_affin_mask_t mask;
798  KMP_CPU_ZERO(&mask);
799  KMP_CPU_SET(proc, &mask);
800  __kmp_set_system_affinity(&mask, TRUE);
801  }
802 }
803 
804 void
805 __kmp_affinity_determine_capable( const char *env_var )
806 {
807  //
808  // All versions of Windows* OS (since Win '95) support SetThreadAffinityMask().
809  //
810 
811 #if KMP_ARCH_X86_64
812  __kmp_affin_mask_size = __kmp_num_proc_groups * sizeof(kmp_affin_mask_t);
813 #else
814  __kmp_affin_mask_size = sizeof(kmp_affin_mask_t);
815 #endif
816 
817  KA_TRACE( 10, (
818  "__kmp_affinity_determine_capable: "
819  "Windows* OS affinity interface functional (mask size = %" KMP_SIZE_T_SPEC ").\n",
820  __kmp_affin_mask_size
821  ) );
822 }
823 
824 double
825 __kmp_read_cpu_time( void )
826 {
827  FILETIME CreationTime, ExitTime, KernelTime, UserTime;
828  int status;
829  double cpu_time;
830 
831  cpu_time = 0;
832 
833  status = GetProcessTimes( GetCurrentProcess(), &CreationTime,
834  &ExitTime, &KernelTime, &UserTime );
835 
836  if (status) {
837  double sec = 0;
838 
839  sec += KernelTime.dwHighDateTime;
840  sec += UserTime.dwHighDateTime;
841 
842  /* Shift left by 32 bits */
843  sec *= (double) (1 << 16) * (double) (1 << 16);
844 
845  sec += KernelTime.dwLowDateTime;
846  sec += UserTime.dwLowDateTime;
847 
848  cpu_time += (sec * 100.0) / NSEC_PER_SEC;
849  }
850 
851  return cpu_time;
852 }
853 
854 int
855 __kmp_read_system_info( struct kmp_sys_info *info )
856 {
857  info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */
858  info->minflt = 0; /* the number of page faults serviced without any I/O */
859  info->majflt = 0; /* the number of page faults serviced that required I/O */
860  info->nswap = 0; /* the number of times a process was "swapped" out of memory */
861  info->inblock = 0; /* the number of times the file system had to perform input */
862  info->oublock = 0; /* the number of times the file system had to perform output */
863  info->nvcsw = 0; /* the number of times a context switch was voluntarily */
864  info->nivcsw = 0; /* the number of times a context switch was forced */
865 
866  return 1;
867 }
868 
869 /* ------------------------------------------------------------------------ */
870 /* ------------------------------------------------------------------------ */
871 
872 
873 void
874 __kmp_runtime_initialize( void )
875 {
876  SYSTEM_INFO info;
877  kmp_str_buf_t path;
878  UINT path_size;
879 
880  if ( __kmp_init_runtime ) {
881  return;
882  };
883 
884  InitializeCriticalSection( & __kmp_win32_section );
885 #if USE_ITT_BUILD
886  __kmp_itt_system_object_created( & __kmp_win32_section, "Critical Section" );
887 #endif /* USE_ITT_BUILD */
888  __kmp_initialize_system_tick();
889 
890  #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
891  if ( ! __kmp_cpuinfo.initialized ) {
892  __kmp_query_cpuid( & __kmp_cpuinfo );
893  }; // if
894  #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
895 
896  if ( __kmp_cpu_frequency == 0 ) {
897  // __kmp_hardware_timestamp() calls to QueryPerformanceCounter(). If
898  // __kmp_hardware_timestamp() rewritten to use RDTSC instruction (or its 64 analog),
899  // probably we should try to get frequency from __kmp_cpuinfo.frequency first (see
900  // z_Linux_util.c).
901  LARGE_INTEGER freq;
902  BOOL rc;
903  rc = QueryPerformanceFrequency( & freq );
904  if ( rc ) {
905  KMP_DEBUG_ASSERT( sizeof( __kmp_cpu_frequency ) >= sizeof( freq.QuadPart ) );
906  KMP_DEBUG_ASSERT( freq.QuadPart >= 0 );
907  __kmp_cpu_frequency = freq.QuadPart;
908  KA_TRACE( 5, ( "cpu frequency: %" KMP_UINT64_SPEC "\n", __kmp_cpu_frequency ) );
909  } else {
910  __kmp_cpu_frequency = ~ 0;
911  }; // if
912  }; // if
913 
914  /* Set up minimum number of threads to switch to TLS gtid */
915  #if KMP_OS_WINDOWS && ! defined GUIDEDLL_EXPORTS
916  // Windows* OS, static library.
917  /*
918  New thread may use stack space previously used by another thread, currently terminated.
919  On Windows* OS, in case of static linking, we do not know the moment of thread termination,
920  and our structures (__kmp_threads and __kmp_root arrays) are still keep info about dead
921  threads. This leads to problem in __kmp_get_global_thread_id() function: it wrongly
922  finds gtid (by searching through stack addresses of all known threads) for unregistered
923  foreign tread.
924 
925  Setting __kmp_tls_gtid_min to 0 workarounds this problem: __kmp_get_global_thread_id()
926  does not search through stacks, but get gtid from TLS immediatelly.
927 
928  --ln
929  */
930  __kmp_tls_gtid_min = 0;
931  #else
932  __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
933  #endif
934 
935  /* for the static library */
936  if ( !__kmp_gtid_threadprivate_key ) {
937  __kmp_gtid_threadprivate_key = TlsAlloc();
938  if( __kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES ) {
939  KMP_FATAL( TLSOutOfIndexes );
940  }
941  }
942 
943 
944  //
945  // Load ntdll.dll.
946  //
947  /*
948  Simple
949  GetModuleHandle( "ntdll.dl" )
950  is not suitable due to security issue (see
951  http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full
952  path to the library.
953  */
954  __kmp_str_buf_init( & path );
955  path_size = GetSystemDirectory( path.str, path.size );
956  KMP_DEBUG_ASSERT( path_size > 0 );
957  if ( path_size >= path.size ) {
958  //
959  // Buffer is too short. Expand the buffer and try again.
960  //
961  __kmp_str_buf_reserve( & path, path_size );
962  path_size = GetSystemDirectory( path.str, path.size );
963  KMP_DEBUG_ASSERT( path_size > 0 );
964  }; // if
965  if ( path_size > 0 && path_size < path.size ) {
966  //
967  // Now we have system directory name in the buffer.
968  // Append backslash and name of dll to form full path,
969  //
970  path.used = path_size;
971  __kmp_str_buf_print( & path, "\\%s", "ntdll.dll" );
972 
973  //
974  // Now load ntdll using full path.
975  //
976  ntdll = GetModuleHandle( path.str );
977  }
978 
979  KMP_DEBUG_ASSERT( ntdll != NULL );
980  if ( ntdll != NULL ) {
981  NtQuerySystemInformation = (NtQuerySystemInformation_t) GetProcAddress( ntdll, "NtQuerySystemInformation" );
982  }
983  KMP_DEBUG_ASSERT( NtQuerySystemInformation != NULL );
984 
985 #if KMP_ARCH_X86_64
986  //
987  // Load kernel32.dll.
988  // Same caveat - must use full system path name.
989  //
990  if ( path_size > 0 && path_size < path.size ) {
991  //
992  // Truncate the buffer back to just the system path length,
993  // discarding "\\ntdll.dll", and replacing it with "kernel32.dll".
994  //
995  path.used = path_size;
996  __kmp_str_buf_print( & path, "\\%s", "kernel32.dll" );
997 
998  //
999  // Load kernel32.dll using full path.
1000  //
1001  kernel32 = GetModuleHandle( path.str );
1002 
1003  //
1004  // Load the function pointers to kernel32.dll routines
1005  // that may or may not exist on this system.
1006  //
1007  if ( kernel32 != NULL ) {
1008  __kmp_GetActiveProcessorCount = (kmp_GetActiveProcessorCount_t) GetProcAddress( kernel32, "GetActiveProcessorCount" );
1009  __kmp_GetActiveProcessorGroupCount = (kmp_GetActiveProcessorGroupCount_t) GetProcAddress( kernel32, "GetActiveProcessorGroupCount" );
1010  __kmp_GetThreadGroupAffinity = (kmp_GetThreadGroupAffinity_t) GetProcAddress( kernel32, "GetThreadGroupAffinity" );
1011  __kmp_SetThreadGroupAffinity = (kmp_SetThreadGroupAffinity_t) GetProcAddress( kernel32, "SetThreadGroupAffinity" );
1012 
1013  //
1014  // See if group affinity is supported on this system.
1015  // If so, calculate the #groups and #procs.
1016  //
1017  // Group affinity was introduced with Windows* 7 OS and
1018  // Windows* Server 2008 R2 OS.
1019  //
1020  if ( ( __kmp_GetActiveProcessorCount != NULL )
1021  && ( __kmp_GetActiveProcessorGroupCount != NULL )
1022  && ( __kmp_GetThreadGroupAffinity != NULL )
1023  && ( __kmp_SetThreadGroupAffinity != NULL )
1024  && ( ( __kmp_num_proc_groups
1025  = __kmp_GetActiveProcessorGroupCount() ) > 1 ) ) {
1026  //
1027  // Calculate the total number of active OS procs.
1028  //
1029  int i;
1030 
1031  KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) );
1032 
1033  __kmp_xproc = 0;
1034 
1035  for ( i = 0; i < __kmp_num_proc_groups; i++ ) {
1036  DWORD size = __kmp_GetActiveProcessorCount( i );
1037  __kmp_xproc += size;
1038  KA_TRACE( 20, ("__kmp_runtime_initialize: proc group %d size = %d\n", i, size ) );
1039  }
1040  }
1041  }
1042  }
1043  if ( __kmp_num_proc_groups <= 1 ) {
1044  GetSystemInfo( & info );
1045  __kmp_xproc = info.dwNumberOfProcessors;
1046  }
1047 #else
1048  GetSystemInfo( & info );
1049  __kmp_xproc = info.dwNumberOfProcessors;
1050 #endif // KMP_ARCH_X86_64
1051 
1052  //
1053  // If the OS said there were 0 procs, take a guess and use a value of 2.
1054  // This is done for Linux* OS, also. Do we need error / warning?
1055  //
1056  if ( __kmp_xproc <= 0 ) {
1057  __kmp_xproc = 2;
1058  }
1059 
1060  KA_TRACE( 5, ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc) );
1061 
1062  __kmp_str_buf_free( & path );
1063 
1064 #if USE_ITT_BUILD
1065  __kmp_itt_initialize();
1066 #endif /* USE_ITT_BUILD */
1067 
1068  __kmp_init_runtime = TRUE;
1069 } // __kmp_runtime_initialize
1070 
1071 void
1072 __kmp_runtime_destroy( void )
1073 {
1074  if ( ! __kmp_init_runtime ) {
1075  return;
1076  }
1077 
1078 #if USE_ITT_BUILD
1079  __kmp_itt_destroy();
1080 #endif /* USE_ITT_BUILD */
1081 
1082  /* we can't DeleteCriticalsection( & __kmp_win32_section ); */
1083  /* due to the KX_TRACE() commands */
1084  KA_TRACE( 40, ("__kmp_runtime_destroy\n" ));
1085 
1086  if( __kmp_gtid_threadprivate_key ) {
1087  TlsFree( __kmp_gtid_threadprivate_key );
1088  __kmp_gtid_threadprivate_key = 0;
1089  }
1090 
1091  __kmp_affinity_uninitialize();
1092  DeleteCriticalSection( & __kmp_win32_section );
1093 
1094  ntdll = NULL;
1095  NtQuerySystemInformation = NULL;
1096 
1097 #if KMP_ARCH_X86_64
1098  kernel32 = NULL;
1099  __kmp_GetActiveProcessorCount = NULL;
1100  __kmp_GetActiveProcessorGroupCount = NULL;
1101  __kmp_GetThreadGroupAffinity = NULL;
1102  __kmp_SetThreadGroupAffinity = NULL;
1103 #endif // KMP_ARCH_X86_64
1104 
1105  __kmp_init_runtime = FALSE;
1106 }
1107 
1108 
1109 void
1110 __kmp_terminate_thread( int gtid )
1111 {
1112  kmp_info_t *th = __kmp_threads[ gtid ];
1113 
1114  if( !th ) return;
1115 
1116  KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) );
1117 
1118  if (TerminateThread( th->th.th_info.ds.ds_thread, (DWORD) -1) == FALSE) {
1119  /* It's OK, the thread may have exited already */
1120  }
1121  __kmp_free_handle( th->th.th_info.ds.ds_thread );
1122 }
1123 
1124 /* ------------------------------------------------------------------------ */
1125 /* ------------------------------------------------------------------------ */
1126 
1127 void
1128 __kmp_clear_system_time( void )
1129 {
1130  BOOL status;
1131  LARGE_INTEGER time;
1132  status = QueryPerformanceCounter( & time );
1133  __kmp_win32_time = (kmp_int64) time.QuadPart;
1134 }
1135 
1136 void
1137 __kmp_initialize_system_tick( void )
1138 {
1139  {
1140  BOOL status;
1141  LARGE_INTEGER freq;
1142 
1143  status = QueryPerformanceFrequency( & freq );
1144  if (! status) {
1145  DWORD error = GetLastError();
1146  __kmp_msg(
1147  kmp_ms_fatal,
1148  KMP_MSG( FunctionError, "QueryPerformanceFrequency()" ),
1149  KMP_ERR( error ),
1150  __kmp_msg_null
1151  );
1152 
1153  }
1154  else {
1155  __kmp_win32_tick = ((double) 1.0) / (double) freq.QuadPart;
1156  }
1157  }
1158 }
1159 
1160 /* Calculate the elapsed wall clock time for the user */
1161 
1162 void
1163 __kmp_elapsed( double *t )
1164 {
1165  BOOL status;
1166  LARGE_INTEGER now;
1167  status = QueryPerformanceCounter( & now );
1168  *t = ((double) now.QuadPart) * __kmp_win32_tick;
1169 }
1170 
1171 /* Calculate the elapsed wall clock tick for the user */
1172 
1173 void
1174 __kmp_elapsed_tick( double *t )
1175 {
1176  *t = __kmp_win32_tick;
1177 }
1178 
1179 void
1180 __kmp_read_system_time( double *delta )
1181 {
1182 
1183  if (delta != NULL) {
1184  BOOL status;
1185  LARGE_INTEGER now;
1186 
1187  status = QueryPerformanceCounter( & now );
1188 
1189  *delta = ((double) (((kmp_int64) now.QuadPart) - __kmp_win32_time))
1190  * __kmp_win32_tick;
1191  }
1192 }
1193 
1194 /* ------------------------------------------------------------------------ */
1195 /* ------------------------------------------------------------------------ */
1196 
1197 /*
1198  * Change thread to the affinity mask pointed to by affin_mask argument
1199  * and return a pointer to the old value in the old_mask argument, if argument
1200  * is non-NULL.
1201  */
1202 
1203 void
1204 __kmp_change_thread_affinity_mask( int gtid, kmp_affin_mask_t *new_mask,
1205  kmp_affin_mask_t *old_mask )
1206 {
1207  kmp_info_t *th = __kmp_threads[ gtid ];
1208 
1209  KMP_DEBUG_ASSERT( *new_mask != 0 );
1210 
1211  if ( old_mask != NULL ) {
1212  *old_mask = SetThreadAffinityMask( th -> th.th_info.ds.ds_thread, *new_mask );
1213 
1214  if (! *old_mask ) {
1215  DWORD error = GetLastError();
1216  __kmp_msg(
1217  kmp_ms_fatal,
1218  KMP_MSG( CantSetThreadAffMask ),
1219  KMP_ERR( error ),
1220  __kmp_msg_null
1221  );
1222  }
1223  }
1224  if (__kmp_affinity_verbose)
1225  KMP_INFORM( ChangeAffMask, "KMP_AFFINITY (Bind)", gtid, *old_mask, *new_mask );
1226 
1227  /* Make sure old value is correct in thread data structures */
1228  KMP_DEBUG_ASSERT( old_mask != NULL && *old_mask == *(th -> th.th_affin_mask ));
1229 
1230  KMP_CPU_COPY(th -> th.th_affin_mask, new_mask);
1231 }
1232 
1233 
1234 /* ------------------------------------------------------------------------ */
1235 /* ------------------------------------------------------------------------ */
1236 
1237 void * __stdcall
1238 __kmp_launch_worker( void *arg )
1239 {
1240  volatile void *stack_data;
1241  void *exit_val;
1242  void *padding = 0;
1243  kmp_info_t *this_thr = (kmp_info_t *) arg;
1244  int gtid;
1245 
1246  gtid = this_thr->th.th_info.ds.ds_gtid;
1247  __kmp_gtid_set_specific( gtid );
1248 #ifdef KMP_TDATA_GTID
1249  #error "This define causes problems with LoadLibrary() + declspec(thread) " \
1250  "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \
1251  "reference: http://support.microsoft.com/kb/118816"
1252  //__kmp_gtid = gtid;
1253 #endif
1254 
1255 #if USE_ITT_BUILD
1256  __kmp_itt_thread_name( gtid );
1257 #endif /* USE_ITT_BUILD */
1258 
1259  __kmp_affinity_set_init_mask( gtid, FALSE );
1260 
1261 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1262  //
1263  // Set the FP control regs to be a copy of
1264  // the parallel initialization thread's.
1265  //
1266  __kmp_clear_x87_fpu_status_word();
1267  __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
1268  __kmp_load_mxcsr( &__kmp_init_mxcsr );
1269 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1270 
1271  if ( __kmp_stkoffset > 0 && gtid > 0 ) {
1272  padding = _alloca( gtid * __kmp_stkoffset );
1273  }
1274 
1275  KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive );
1276  this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1277  TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE );
1278 
1279  if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid
1280  TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data);
1281  KMP_ASSERT( this_thr -> th.th_info.ds.ds_stackgrow == FALSE );
1282  __kmp_check_stack_overlap( this_thr );
1283  }
1284  KMP_MB();
1285  exit_val = __kmp_launch_thread( this_thr );
1286  KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive );
1287  TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE );
1288  KMP_MB();
1289  return exit_val;
1290 }
1291 
1292 
1293 /* The monitor thread controls all of the threads in the complex */
1294 
1295 void * __stdcall
1296 __kmp_launch_monitor( void *arg )
1297 {
1298  DWORD wait_status;
1299  kmp_thread_t monitor;
1300  int status;
1301  int interval;
1302  kmp_info_t *this_thr = (kmp_info_t *) arg;
1303 
1304  KMP_DEBUG_ASSERT(__kmp_init_monitor);
1305  TCW_4( __kmp_init_monitor, 2 ); // AC: Signal the library that monitor has started
1306  // TODO: hide "2" in enum (like {true,false,started})
1307  this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1308  TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE );
1309 
1310  KMP_MB(); /* Flush all pending memory write invalidates. */
1311  KA_TRACE( 10, ("__kmp_launch_monitor: launched\n" ) );
1312 
1313  monitor = GetCurrentThread();
1314 
1315  /* set thread priority */
1316  status = SetThreadPriority( monitor, THREAD_PRIORITY_HIGHEST );
1317  if (! status) {
1318  DWORD error = GetLastError();
1319  __kmp_msg(
1320  kmp_ms_fatal,
1321  KMP_MSG( CantSetThreadPriority ),
1322  KMP_ERR( error ),
1323  __kmp_msg_null
1324  );
1325  }
1326 
1327  /* register us as monitor */
1328  __kmp_gtid_set_specific( KMP_GTID_MONITOR );
1329 #ifdef KMP_TDATA_GTID
1330  #error "This define causes problems with LoadLibrary() + declspec(thread) " \
1331  "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \
1332  "reference: http://support.microsoft.com/kb/118816"
1333  //__kmp_gtid = KMP_GTID_MONITOR;
1334 #endif
1335 
1336 #if USE_ITT_BUILD
1337  __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore monitor thread.
1338 #endif /* USE_ITT_BUILD */
1339 
1340  KMP_MB(); /* Flush all pending memory write invalidates. */
1341 
1342  interval = ( 1000 / __kmp_monitor_wakeups ); /* in milliseconds */
1343 
1344  while (! TCR_4(__kmp_global.g.g_done)) {
1345  /* This thread monitors the state of the system */
1346 
1347  KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) );
1348 
1349  wait_status = WaitForSingleObject( __kmp_monitor_ev, interval );
1350 
1351  if (wait_status == WAIT_TIMEOUT) {
1352  TCW_4( __kmp_global.g.g_time.dt.t_value,
1353  TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 );
1354  }
1355 
1356  KMP_MB(); /* Flush all pending memory write invalidates. */
1357  }
1358 
1359  KA_TRACE( 10, ("__kmp_launch_monitor: finished\n" ) );
1360 
1361  status = SetThreadPriority( monitor, THREAD_PRIORITY_NORMAL );
1362  if (! status) {
1363  DWORD error = GetLastError();
1364  __kmp_msg(
1365  kmp_ms_fatal,
1366  KMP_MSG( CantSetThreadPriority ),
1367  KMP_ERR( error ),
1368  __kmp_msg_null
1369  );
1370  }
1371 
1372  if (__kmp_global.g.g_abort != 0) {
1373  /* now we need to terminate the worker threads */
1374  /* the value of t_abort is the signal we caught */
1375 
1376  int gtid;
1377 
1378  KA_TRACE( 10, ("__kmp_launch_monitor: terminate sig=%d\n", (__kmp_global.g.g_abort) ) );
1379 
1380  /* terminate the OpenMP worker threads */
1381  /* TODO this is not valid for sibling threads!!
1382  * the uber master might not be 0 anymore.. */
1383  for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
1384  __kmp_terminate_thread( gtid );
1385 
1386  __kmp_cleanup();
1387 
1388  Sleep( 0 );
1389 
1390  KA_TRACE( 10, ("__kmp_launch_monitor: raise sig=%d\n", (__kmp_global.g.g_abort) ) );
1391 
1392  if (__kmp_global.g.g_abort > 0) {
1393  raise( __kmp_global.g.g_abort );
1394  }
1395  }
1396 
1397  TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE );
1398 
1399  KMP_MB();
1400  return arg;
1401 }
1402 
1403 void
1404 __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size )
1405 {
1406  kmp_thread_t handle;
1407  DWORD idThread;
1408 
1409  KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) );
1410 
1411  th->th.th_info.ds.ds_gtid = gtid;
1412 
1413  if ( KMP_UBER_GTID(gtid) ) {
1414  int stack_data;
1415 
1416  /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for other threads to use.
1417  Is it appropriate to just use GetCurrentThread? When should we close this handle? When
1418  unregistering the root?
1419  */
1420  {
1421  BOOL rc;
1422  rc = DuplicateHandle(
1423  GetCurrentProcess(),
1424  GetCurrentThread(),
1425  GetCurrentProcess(),
1426  &th->th.th_info.ds.ds_thread,
1427  0,
1428  FALSE,
1429  DUPLICATE_SAME_ACCESS
1430  );
1431  KMP_ASSERT( rc );
1432  KA_TRACE( 10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
1433  (LPVOID)th,
1434  th->th.th_info.ds.ds_thread ) );
1435  th->th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1436  }
1437  if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid
1438  /* we will dynamically update the stack range if gtid_mode == 1 */
1439  TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
1440  TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
1441  TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
1442  __kmp_check_stack_overlap( th );
1443  }
1444  }
1445  else {
1446  KMP_MB(); /* Flush all pending memory write invalidates. */
1447 
1448  /* Set stack size for this thread now. */
1449  KA_TRACE( 10, ( "__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC
1450  " bytes\n", stack_size ) );
1451 
1452  stack_size += gtid * __kmp_stkoffset;
1453 
1454  TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size);
1455  TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
1456 
1457  KA_TRACE( 10, ( "__kmp_create_worker: (before) stack_size = %"
1458  KMP_SIZE_T_SPEC
1459  " bytes, &__kmp_launch_worker = %p, th = %p, "
1460  "&idThread = %p\n",
1461  (SIZE_T) stack_size,
1462  (LPTHREAD_START_ROUTINE) & __kmp_launch_worker,
1463  (LPVOID) th, &idThread ) );
1464 
1465  {
1466  handle = CreateThread( NULL, (SIZE_T) stack_size,
1467  (LPTHREAD_START_ROUTINE) __kmp_launch_worker,
1468  (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread );
1469  }
1470 
1471  KA_TRACE( 10, ( "__kmp_create_worker: (after) stack_size = %"
1472  KMP_SIZE_T_SPEC
1473  " bytes, &__kmp_launch_worker = %p, th = %p, "
1474  "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n",
1475  (SIZE_T) stack_size,
1476  (LPTHREAD_START_ROUTINE) & __kmp_launch_worker,
1477  (LPVOID) th, idThread, handle ) );
1478 
1479  {
1480  if ( handle == 0 ) {
1481  DWORD error = GetLastError();
1482  __kmp_msg(
1483  kmp_ms_fatal,
1484  KMP_MSG( CantCreateThread ),
1485  KMP_ERR( error ),
1486  __kmp_msg_null
1487  );
1488  } else {
1489  th->th.th_info.ds.ds_thread = handle;
1490  }
1491  }
1492  KMP_MB(); /* Flush all pending memory write invalidates. */
1493  }
1494 
1495  KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) );
1496 }
1497 
1498 int
1499 __kmp_still_running(kmp_info_t *th) {
1500  return (WAIT_TIMEOUT == WaitForSingleObject( th->th.th_info.ds.ds_thread, 0));
1501 }
1502 
1503 void
1504 __kmp_create_monitor( kmp_info_t *th )
1505 {
1506  kmp_thread_t handle;
1507  DWORD idThread;
1508  int ideal, new_ideal;
1509  int caller_gtid = __kmp_get_gtid();
1510 
1511  KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) );
1512 
1513  KMP_MB(); /* Flush all pending memory write invalidates. */
1514 
1515  __kmp_monitor_ev = CreateEvent( NULL, TRUE, FALSE, NULL );
1516  if ( __kmp_monitor_ev == NULL ) {
1517  DWORD error = GetLastError();
1518  __kmp_msg(
1519  kmp_ms_fatal,
1520  KMP_MSG( CantCreateEvent ),
1521  KMP_ERR( error ),
1522  __kmp_msg_null
1523  );
1524  }; // if
1525 #if USE_ITT_BUILD
1526  __kmp_itt_system_object_created( __kmp_monitor_ev, "Event" );
1527 #endif /* USE_ITT_BUILD */
1528 
1529  th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
1530  th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
1531 
1532  // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how
1533  // to automatically expand stacksize based on CreateThread error code.
1534  if ( __kmp_monitor_stksize == 0 ) {
1535  __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
1536  }
1537  if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) {
1538  __kmp_monitor_stksize = __kmp_sys_min_stksize;
1539  }
1540 
1541  KA_TRACE( 10, ("__kmp_create_monitor: requested stacksize = %d bytes\n",
1542  (int) __kmp_monitor_stksize ) );
1543 
1544  TCW_4( __kmp_global.g.g_time.dt.t_value, 0 );
1545 
1546  handle = CreateThread( NULL, (SIZE_T) __kmp_monitor_stksize,
1547  (LPTHREAD_START_ROUTINE) __kmp_launch_monitor,
1548  (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread );
1549  if (handle == 0) {
1550  DWORD error = GetLastError();
1551  __kmp_msg(
1552  kmp_ms_fatal,
1553  KMP_MSG( CantCreateThread ),
1554  KMP_ERR( error ),
1555  __kmp_msg_null
1556  );
1557  }
1558  else
1559  th->th.th_info.ds.ds_thread = handle;
1560 
1561  KMP_MB(); /* Flush all pending memory write invalidates. */
1562 
1563  KA_TRACE( 10, ("__kmp_create_monitor: monitor created %p\n",
1564  (void *) th->th.th_info.ds.ds_thread ) );
1565 }
1566 
1567 /*
1568  Check to see if thread is still alive.
1569 
1570  NOTE: The ExitProcess(code) system call causes all threads to Terminate
1571  with a exit_val = code. Because of this we can not rely on
1572  exit_val having any particular value. So this routine may
1573  return STILL_ALIVE in exit_val even after the thread is dead.
1574 */
1575 
1576 int
1577 __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val )
1578 {
1579  DWORD rc;
1580  rc = GetExitCodeThread( th->th.th_info.ds.ds_thread, exit_val );
1581  if ( rc == 0 ) {
1582  DWORD error = GetLastError();
1583  __kmp_msg(
1584  kmp_ms_fatal,
1585  KMP_MSG( FunctionError, "GetExitCodeThread()" ),
1586  KMP_ERR( error ),
1587  __kmp_msg_null
1588  );
1589  }; // if
1590  return ( *exit_val == STILL_ACTIVE );
1591 }
1592 
1593 
1594 void
1595 __kmp_exit_thread(
1596  int exit_status
1597 ) {
1598  ExitThread( exit_status );
1599 } // __kmp_exit_thread
1600 
1601 /*
1602  This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor().
1603 */
1604 static void
1605 __kmp_reap_common( kmp_info_t * th )
1606 {
1607  DWORD exit_val;
1608 
1609  KMP_MB(); /* Flush all pending memory write invalidates. */
1610 
1611  KA_TRACE( 10, ( "__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid ) );
1612 
1613  /*
1614  2006-10-19:
1615 
1616  There are two opposite situations:
1617 
1618  1. Windows* OS keep thread alive after it resets ds_alive flag and exits from thread
1619  function. (For example, see C70770/Q394281 "unloading of dll based on OMP is very
1620  slow".)
1621  2. Windows* OS may kill thread before it resets ds_alive flag.
1622 
1623  Right solution seems to be waiting for *either* thread termination *or* ds_alive resetting.
1624 
1625  */
1626 
1627  {
1628  // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize KMP_WAIT_YIELD to
1629  // cover this usage also.
1630  void * obj = NULL;
1631  register kmp_uint32 spins;
1632 #if USE_ITT_BUILD
1633  KMP_FSYNC_SPIN_INIT( obj, (void*) & th->th.th_info.ds.ds_alive );
1634 #endif /* USE_ITT_BUILD */
1635  KMP_INIT_YIELD( spins );
1636  do {
1637 #if USE_ITT_BUILD
1638  KMP_FSYNC_SPIN_PREPARE( obj );
1639 #endif /* USE_ITT_BUILD */
1640  __kmp_is_thread_alive( th, &exit_val );
1641  __kmp_static_delay( TRUE );
1642  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
1643  KMP_YIELD_SPIN( spins );
1644  } while ( exit_val == STILL_ACTIVE && TCR_4( th->th.th_info.ds.ds_alive ) );
1645 #if USE_ITT_BUILD
1646  if ( exit_val == STILL_ACTIVE ) {
1647  KMP_FSYNC_CANCEL( obj );
1648  } else {
1649  KMP_FSYNC_SPIN_ACQUIRED( obj );
1650  }; // if
1651 #endif /* USE_ITT_BUILD */
1652  }
1653 
1654  __kmp_free_handle( th->th.th_info.ds.ds_thread );
1655 
1656  /*
1657  * NOTE: The ExitProcess(code) system call causes all threads to Terminate
1658  * with a exit_val = code. Because of this we can not rely on
1659  * exit_val having any particular value.
1660  */
1661  if ( exit_val == STILL_ACTIVE ) {
1662  KA_TRACE( 1, ( "__kmp_reap_common: thread still active.\n" ) );
1663  } else if ( (void *) exit_val != (void *) th) {
1664  KA_TRACE( 1, ( "__kmp_reap_common: ExitProcess / TerminateThread used?\n" ) );
1665  }; // if
1666 
1667  KA_TRACE( 10,
1668  (
1669  "__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC "\n",
1670  th->th.th_info.ds.ds_gtid,
1671  th->th.th_info.ds.ds_thread
1672  )
1673  );
1674 
1675  th->th.th_info.ds.ds_thread = 0;
1676  th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1677  th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1678  th->th.th_info.ds.ds_thread_id = 0;
1679 
1680  KMP_MB(); /* Flush all pending memory write invalidates. */
1681 }
1682 
1683 void
1684 __kmp_reap_monitor( kmp_info_t *th )
1685 {
1686  int status;
1687 
1688  KA_TRACE( 10, ("__kmp_reap_monitor: try to reap %p\n",
1689  (void *) th->th.th_info.ds.ds_thread ) );
1690 
1691  // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1692  // If both tid and gtid are 0, it means the monitor did not ever start.
1693  // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1694  KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid );
1695  if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) {
1696  return;
1697  }; // if
1698 
1699  KMP_MB(); /* Flush all pending memory write invalidates. */
1700 
1701  status = SetEvent( __kmp_monitor_ev );
1702  if ( status == FALSE ) {
1703  DWORD error = GetLastError();
1704  __kmp_msg(
1705  kmp_ms_fatal,
1706  KMP_MSG( CantSetEvent ),
1707  KMP_ERR( error ),
1708  __kmp_msg_null
1709  );
1710  }
1711  KA_TRACE( 10, ( "__kmp_reap_monitor: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) );
1712  __kmp_reap_common( th );
1713 
1714  __kmp_free_handle( __kmp_monitor_ev );
1715 
1716  KMP_MB(); /* Flush all pending memory write invalidates. */
1717 }
1718 
1719 void
1720 __kmp_reap_worker( kmp_info_t * th )
1721 {
1722  KA_TRACE( 10, ( "__kmp_reap_worker: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) );
1723  __kmp_reap_common( th );
1724 }
1725 
1726 /* ------------------------------------------------------------------------ */
1727 /* ------------------------------------------------------------------------ */
1728 
1729 #if KMP_HANDLE_SIGNALS
1730 
1731 
1732 static void
1733 __kmp_team_handler( int signo )
1734 {
1735  if ( __kmp_global.g.g_abort == 0 ) {
1736  // Stage 1 signal handler, let's shut down all of the threads.
1737  if ( __kmp_debug_buf ) {
1738  __kmp_dump_debug_buffer();
1739  }; // if
1740  KMP_MB(); // Flush all pending memory write invalidates.
1741  TCW_4( __kmp_global.g.g_abort, signo );
1742  KMP_MB(); // Flush all pending memory write invalidates.
1743  TCW_4( __kmp_global.g.g_done, TRUE );
1744  KMP_MB(); // Flush all pending memory write invalidates.
1745  }
1746 } // __kmp_team_handler
1747 
1748 
1749 
1750 static
1751 sig_func_t __kmp_signal( int signum, sig_func_t handler ) {
1752  sig_func_t old = signal( signum, handler );
1753  if ( old == SIG_ERR ) {
1754  int error = errno;
1755  __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "signal" ), KMP_ERR( error ), __kmp_msg_null );
1756  }; // if
1757  return old;
1758 }
1759 
1760 static void
1761 __kmp_install_one_handler(
1762  int sig,
1763  sig_func_t handler,
1764  int parallel_init
1765 ) {
1766  sig_func_t old;
1767  KMP_MB(); /* Flush all pending memory write invalidates. */
1768  KB_TRACE( 60, ("__kmp_install_one_handler: called: sig=%d\n", sig ) );
1769  if ( parallel_init ) {
1770  old = __kmp_signal( sig, handler );
1771  // SIG_DFL on Windows* OS in NULL or 0.
1772  if ( old == __kmp_sighldrs[ sig ] ) {
1773  __kmp_siginstalled[ sig ] = 1;
1774  } else {
1775  // Restore/keep user's handler if one previously installed.
1776  old = __kmp_signal( sig, old );
1777  }; // if
1778  } else {
1779  // Save initial/system signal handlers to see if user handlers installed.
1780  // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals called once with
1781  // parallel_init == TRUE.
1782  old = __kmp_signal( sig, SIG_DFL );
1783  __kmp_sighldrs[ sig ] = old;
1784  __kmp_signal( sig, old );
1785  }; // if
1786  KMP_MB(); /* Flush all pending memory write invalidates. */
1787 } // __kmp_install_one_handler
1788 
1789 static void
1790 __kmp_remove_one_handler( int sig ) {
1791  if ( __kmp_siginstalled[ sig ] ) {
1792  sig_func_t old;
1793  KMP_MB(); // Flush all pending memory write invalidates.
1794  KB_TRACE( 60, ( "__kmp_remove_one_handler: called: sig=%d\n", sig ) );
1795  old = __kmp_signal( sig, __kmp_sighldrs[ sig ] );
1796  if ( old != __kmp_team_handler ) {
1797  KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) );
1798  old = __kmp_signal( sig, old );
1799  }; // if
1800  __kmp_sighldrs[ sig ] = NULL;
1801  __kmp_siginstalled[ sig ] = 0;
1802  KMP_MB(); // Flush all pending memory write invalidates.
1803  }; // if
1804 } // __kmp_remove_one_handler
1805 
1806 
1807 void
1808 __kmp_install_signals( int parallel_init )
1809 {
1810  KB_TRACE( 10, ( "__kmp_install_signals: called\n" ) );
1811  if ( ! __kmp_handle_signals ) {
1812  KB_TRACE( 10, ( "__kmp_install_signals: KMP_HANDLE_SIGNALS is false - handlers not installed\n" ) );
1813  return;
1814  }; // if
1815  __kmp_install_one_handler( SIGINT, __kmp_team_handler, parallel_init );
1816  __kmp_install_one_handler( SIGILL, __kmp_team_handler, parallel_init );
1817  __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init );
1818  __kmp_install_one_handler( SIGFPE, __kmp_team_handler, parallel_init );
1819  __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init );
1820  __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init );
1821 } // __kmp_install_signals
1822 
1823 
1824 void
1825 __kmp_remove_signals( void )
1826 {
1827  int sig;
1828  KB_TRACE( 10, ("__kmp_remove_signals: called\n" ) );
1829  for ( sig = 1; sig < NSIG; ++ sig ) {
1830  __kmp_remove_one_handler( sig );
1831  }; // for sig
1832 } // __kmp_remove_signals
1833 
1834 
1835 #endif // KMP_HANDLE_SIGNALS
1836 
1837 /* Put the thread to sleep for a time period */
1838 void
1839 __kmp_thread_sleep( int millis )
1840 {
1841  DWORD status;
1842 
1843  status = SleepEx( (DWORD) millis, FALSE );
1844  if ( status ) {
1845  DWORD error = GetLastError();
1846  __kmp_msg(
1847  kmp_ms_fatal,
1848  KMP_MSG( FunctionError, "SleepEx()" ),
1849  KMP_ERR( error ),
1850  __kmp_msg_null
1851  );
1852  }
1853 }
1854 
1855 /* Determine whether the given address is mapped into the current address space. */
1856 int
1857 __kmp_is_address_mapped( void * addr )
1858 {
1859  DWORD status;
1860  MEMORY_BASIC_INFORMATION lpBuffer;
1861  SIZE_T dwLength;
1862 
1863  dwLength = sizeof(MEMORY_BASIC_INFORMATION);
1864 
1865  status = VirtualQuery( addr, &lpBuffer, dwLength );
1866 
1867  return !((( lpBuffer.State == MEM_RESERVE) || ( lpBuffer.State == MEM_FREE )) ||
1868  (( lpBuffer.Protect == PAGE_NOACCESS ) || ( lpBuffer.Protect == PAGE_EXECUTE )));
1869 }
1870 
1871 kmp_uint64
1872 __kmp_hardware_timestamp(void)
1873 {
1874  kmp_uint64 r = 0;
1875 
1876  QueryPerformanceCounter((LARGE_INTEGER*) &r);
1877  return r;
1878 }
1879 
1880 /* Free handle and check the error code */
1881 void
1882 __kmp_free_handle( kmp_thread_t tHandle )
1883 {
1884 /* called with parameter type HANDLE also, thus suppose kmp_thread_t defined as HANDLE */
1885  BOOL rc;
1886  rc = CloseHandle( tHandle );
1887  if ( !rc ) {
1888  DWORD error = GetLastError();
1889  __kmp_msg(
1890  kmp_ms_fatal,
1891  KMP_MSG( CantCloseHandle ),
1892  KMP_ERR( error ),
1893  __kmp_msg_null
1894  );
1895  }
1896 }
1897 
1898 int
1899 __kmp_get_load_balance( int max ) {
1900 
1901  static ULONG glb_buff_size = 100 * 1024;
1902 
1903  static int glb_running_threads = 0; /* Saved count of the running threads for the thread balance algortihm */
1904  static double glb_call_time = 0; /* Thread balance algorithm call time */
1905 
1906  int running_threads = 0; // Number of running threads in the system.
1907  NTSTATUS status = 0;
1908  ULONG buff_size = 0;
1909  ULONG info_size = 0;
1910  void * buffer = NULL;
1911  PSYSTEM_PROCESS_INFORMATION spi = NULL;
1912  int first_time = 1;
1913 
1914  double call_time = 0.0; //start, finish;
1915 
1916  __kmp_elapsed( & call_time );
1917 
1918  if ( glb_call_time &&
1919  ( call_time - glb_call_time < __kmp_load_balance_interval ) ) {
1920  running_threads = glb_running_threads;
1921  goto finish;
1922  }
1923  glb_call_time = call_time;
1924 
1925  // Do not spend time on running algorithm if we have a permanent error.
1926  if ( NtQuerySystemInformation == NULL ) {
1927  running_threads = -1;
1928  goto finish;
1929  }; // if
1930 
1931  if ( max <= 0 ) {
1932  max = INT_MAX;
1933  }; // if
1934 
1935  do {
1936 
1937  if ( first_time ) {
1938  buff_size = glb_buff_size;
1939  } else {
1940  buff_size = 2 * buff_size;
1941  }
1942 
1943  buffer = KMP_INTERNAL_REALLOC( buffer, buff_size );
1944  if ( buffer == NULL ) {
1945  running_threads = -1;
1946  goto finish;
1947  }; // if
1948  status = NtQuerySystemInformation( SystemProcessInformation, buffer, buff_size, & info_size );
1949  first_time = 0;
1950 
1951  } while ( status == STATUS_INFO_LENGTH_MISMATCH );
1952  glb_buff_size = buff_size;
1953 
1954  #define CHECK( cond ) \
1955  { \
1956  KMP_DEBUG_ASSERT( cond ); \
1957  if ( ! ( cond ) ) { \
1958  running_threads = -1; \
1959  goto finish; \
1960  } \
1961  }
1962 
1963  CHECK( buff_size >= info_size );
1964  spi = PSYSTEM_PROCESS_INFORMATION( buffer );
1965  for ( ; ; ) {
1966  ptrdiff_t offset = uintptr_t( spi ) - uintptr_t( buffer );
1967  CHECK( 0 <= offset && offset + sizeof( SYSTEM_PROCESS_INFORMATION ) < info_size );
1968  HANDLE pid = spi->ProcessId;
1969  ULONG num = spi->NumberOfThreads;
1970  CHECK( num >= 1 );
1971  size_t spi_size = sizeof( SYSTEM_PROCESS_INFORMATION ) + sizeof( SYSTEM_THREAD ) * ( num - 1 );
1972  CHECK( offset + spi_size < info_size ); // Make sure process info record fits the buffer.
1973  if ( spi->NextEntryOffset != 0 ) {
1974  CHECK( spi_size <= spi->NextEntryOffset ); // And do not overlap with the next record.
1975  }; // if
1976  // pid == 0 corresponds to the System Idle Process. It always has running threads
1977  // on all cores. So, we don't consider the running threads of this process.
1978  if ( pid != 0 ) {
1979  for ( int i = 0; i < num; ++ i ) {
1980  THREAD_STATE state = spi->Threads[ i ].State;
1981  // Count threads that have Ready or Running state.
1982  // !!! TODO: Why comment does not match the code???
1983  if ( state == StateRunning ) {
1984  ++ running_threads;
1985  // Stop counting running threads if the number is already greater than
1986  // the number of available cores
1987  if ( running_threads >= max ) {
1988  goto finish;
1989  }
1990  } // if
1991  }; // for i
1992  } // if
1993  if ( spi->NextEntryOffset == 0 ) {
1994  break;
1995  }; // if
1996  spi = PSYSTEM_PROCESS_INFORMATION( uintptr_t( spi ) + spi->NextEntryOffset );
1997  }; // forever
1998 
1999  #undef CHECK
2000 
2001  finish: // Clean up and exit.
2002 
2003  if ( buffer != NULL ) {
2004  KMP_INTERNAL_FREE( buffer );
2005  }; // if
2006 
2007  glb_running_threads = running_threads;
2008 
2009  return running_threads;
2010 
2011 } //__kmp_get_load_balance()
2012