Actual source code: plog.c

  2: /*
  3:       PETSc code to log object creation and destruction and PETSc events.

  5:       This provides the public API used by the rest of PETSc and by users.

  7:       These routines use a private API that is not used elsewhere in PETSc and is not 
  8:       accessible to users. The private API is defined in logimpl.h and the utils directory.

 10: */
 11: #include <petscsys.h>        /*I    "petscsys.h"   I*/
 12: #include <petsctime.h>
 13: #if defined(PETSC_HAVE_MPE)
 14: #include <mpe.h>
 15: #endif
 16: #include <stdarg.h>
 17: #include <sys/types.h>
 18: #if defined(PETSC_HAVE_STDLIB_H)
 19: #include <stdlib.h>
 20: #endif
 21: #if defined(PETSC_HAVE_MALLOC_H)
 22: #include <malloc.h>
 23: #endif
 24: #include <private/logimpl.h>

 26: PetscLogEvent  PETSC_LARGEST_EVENT  = PETSC_EVENT;

 29: std::map<std::string,PETSc::LogEvent> PETSc::Log::event_registry;
 30: std::map<std::string,PETSc::LogStage> PETSc::Log::stage_registry;
 31: #endif

 33: #if defined(PETSC_USE_LOG)
 34: #include <petscmachineinfo.h>
 35: #include <petscconfiginfo.h>

 37: /* used in the MPI_XXX() count macros in petsclog.h */

 39: /* Action and object logging variables */
 40: Action    *actions    = PETSC_NULL;
 41: Object    *objects    = PETSC_NULL;
 42: PetscBool  logActions = PETSC_FALSE;
 43: PetscBool  logObjects = PETSC_FALSE;
 44: int        numActions = 0, maxActions = 100;
 45: int        numObjects = 0, maxObjects = 100;
 46: int        numObjectsDestroyed = 0;

 48: /* Global counters */
 49: PetscLogDouble  BaseTime        = 0.0;
 50: PetscLogDouble  _TotalFlops     = 0.0; /* The number of flops */
 51: PetscLogDouble  petsc_tmp_flops = 0.0; /* The incremental number of flops */
 52: PetscLogDouble  petsc_send_ct         = 0.0; /* The number of sends */
 53: PetscLogDouble  petsc_recv_ct         = 0.0; /* The number of receives */
 54: PetscLogDouble  petsc_send_len        = 0.0; /* The total length of all sent messages */
 55: PetscLogDouble  petsc_recv_len        = 0.0; /* The total length of all received messages */
 56: PetscLogDouble  petsc_isend_ct        = 0.0; /* The number of immediate sends */
 57: PetscLogDouble  petsc_irecv_ct        = 0.0; /* The number of immediate receives */
 58: PetscLogDouble  petsc_isend_len       = 0.0; /* The total length of all immediate send messages */
 59: PetscLogDouble  petsc_irecv_len       = 0.0; /* The total length of all immediate receive messages */
 60: PetscLogDouble  petsc_wait_ct         = 0.0; /* The number of waits */
 61: PetscLogDouble  petsc_wait_any_ct     = 0.0; /* The number of anywaits */
 62: PetscLogDouble  petsc_wait_all_ct     = 0.0; /* The number of waitalls */
 63: PetscLogDouble  petsc_sum_of_waits_ct = 0.0; /* The total number of waits */
 64: PetscLogDouble  petsc_allreduce_ct    = 0.0; /* The number of reductions */
 65: PetscLogDouble  petsc_gather_ct       = 0.0; /* The number of gathers and gathervs */
 66: PetscLogDouble  petsc_scatter_ct      = 0.0; /* The number of scatters and scattervs */

 68: /* Logging functions */
 69: PetscErrorCode  (*_PetscLogPHC)(PetscObject) = PETSC_NULL;
 70: PetscErrorCode  (*_PetscLogPHD)(PetscObject) = PETSC_NULL;
 71: PetscErrorCode  (*_PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = PETSC_NULL;
 72: PetscErrorCode  (*_PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = PETSC_NULL;

 74: /* Tracing event logging variables */
 75: FILE          *tracefile       = PETSC_NULL;
 76: int            tracelevel      = 0;
 77: const char    *traceblanks     = "                                                                                                    ";
 78: char           tracespace[128] = " ";
 79: PetscLogDouble tracetime       = 0.0;
 80: static PetscBool  PetscLogBegin_PrivateCalled = PETSC_FALSE;

 82: /*---------------------------------------------- General Functions --------------------------------------------------*/
 85: /*@C
 86:   PetscLogDestroy - Destroys the object and event logging data and resets the global counters. 

 88:   Not Collective

 90:   Notes:
 91:   This routine should not usually be used by programmers. Instead employ 
 92:   PetscLogStagePush() and PetscLogStagePop().

 94:   Level: developer

 96: .keywords: log, destroy
 97: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogStagePush(), PlogStagePop()
 98: @*/
 99: PetscErrorCode  PetscLogDestroy(void)
100: {
101:   PetscStageLog       stageLog;

105:   PetscFree(actions);
106:   PetscFree(objects);
107:   PetscLogSet(PETSC_NULL, PETSC_NULL);

109:   /* Resetting phase */
110:   PetscLogGetStageLog(&stageLog);
111:   PetscStageLogDestroy(stageLog);
112:   _TotalFlops         = 0.0;
113:   numActions          = 0;
114:   numObjects          = 0;
115:   numObjectsDestroyed = 0;
116:   maxActions          = 100;
117:   maxObjects          = 100;
118:   actions    = PETSC_NULL;
119:   objects    = PETSC_NULL;
120:   logActions = PETSC_FALSE;
121:   logObjects = PETSC_FALSE;
122:   BaseTime        = 0.0;
123:   _TotalFlops     = 0.0;
124:   petsc_tmp_flops = 0.0;
125:   petsc_send_ct         = 0.0;
126:   petsc_recv_ct         = 0.0;
127:   petsc_send_len        = 0.0;
128:   petsc_recv_len        = 0.0;
129:   petsc_isend_ct        = 0.0;
130:   petsc_irecv_ct        = 0.0;
131:   petsc_isend_len       = 0.0;
132:   petsc_irecv_len       = 0.0;
133:   petsc_wait_ct         = 0.0;
134:   petsc_wait_any_ct     = 0.0;
135:   petsc_wait_all_ct     = 0.0;
136:   petsc_sum_of_waits_ct = 0.0;
137:   petsc_allreduce_ct    = 0.0;
138:   petsc_gather_ct       = 0.0;
139:   petsc_scatter_ct      = 0.0;
140:   PETSC_LARGEST_EVENT  = PETSC_EVENT;
141:   _PetscLogPHC = PETSC_NULL;
142:   _PetscLogPHD = PETSC_NULL;
143:   tracefile       = PETSC_NULL;
144:   tracelevel      = 0;
145:   traceblanks     = "                                                                                                    ";
146:   tracespace[0] = ' '; tracespace[1] = 0;
147:   tracetime       = 0.0;
148:   PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
149:   PETSC_OBJECT_CLASSID  = 0;
150:   _stageLog = 0;
151:   PetscLogBegin_PrivateCalled = PETSC_FALSE;
152:   return(0);
153: }

157: /*@C
158:   PetscLogSet - Sets the logging functions called at the beginning and ending of every event.

160:   Not Collective

162:   Input Parameters:
163: + b - The function called at beginning of event
164: - e - The function called at end of event

166:   Level: developer

168: .seealso: PetscLogDump(), PetscLogBegin(), PetscLogAllBegin(), PetscLogTraceBegin()
169: @*/
170: PetscErrorCode  PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject),
171:             PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject))
172: {
174:   _PetscLogPLB = b;
175:   _PetscLogPLE = e;
176:   return(0);
177: }

179: #if defined(PETSC_HAVE_CHUD)
180: #include <CHUD/CHUD.h>
181: #endif
182: #if defined(PETSC_HAVE_PAPI)
183: #include <papi.h>
184: int PAPIEventSet = PAPI_NULL;
185: #endif

187: /*------------------------------------------- Initialization Functions ----------------------------------------------*/
190: PetscErrorCode  PetscLogBegin_Private(void)
191: {
192:   int               stage;
193:   PetscBool         opt;
194:   PetscErrorCode    ierr;

197:   if (PetscLogBegin_PrivateCalled) return(0);
198:   PetscLogBegin_PrivateCalled = PETSC_TRUE;

200:   PetscOptionsHasName(PETSC_NULL, "-log_exclude_actions", &opt);
201:   if (opt) {
202:     logActions = PETSC_FALSE;
203:   }
204:   PetscOptionsHasName(PETSC_NULL, "-log_exclude_objects", &opt);
205:   if (opt) {
206:     logObjects = PETSC_FALSE;
207:   }
208:   if (logActions) {
209:     PetscMalloc(maxActions * sizeof(Action), &actions);
210:   }
211:   if (logObjects) {
212:     PetscMalloc(maxObjects * sizeof(Object), &objects);
213:   }
214:   _PetscLogPHC = PetscLogObjCreateDefault;
215:   _PetscLogPHD = PetscLogObjDestroyDefault;
216:   /* Setup default logging structures */
217:   PetscStageLogCreate(&_stageLog);
218:   PetscStageLogRegister(_stageLog, "Main Stage", &stage);
219: #if defined(PETSC_HAVE_CHUD)
220:   chudInitialize();
221:   chudAcquireSamplingFacility(CHUD_BLOCKING);
222:   chudSetSamplingDevice(chudCPU1Dev);
223:   chudSetStartDelay(0,chudNanoSeconds);
224:   chudClearPMCMode(chudCPU1Dev,chudUnused);
225:   chudClearPMCs();
226:   /* chudSetPMCMuxPosition(chudCPU1Dev,0,0); */
227:   printf("%s\n",chudGetEventName(chudCPU1Dev,PMC_1,193));
228:   printf("%s\n",chudGetEventDescription(chudCPU1Dev,PMC_1,193));
229:   printf("%s\n",chudGetEventNotes(chudCPU1Dev,PMC_1,193));
230:   chudSetPMCEvent(chudCPU1Dev,PMC_1,193);
231:   chudSetPMCMode(chudCPU1Dev,PMC_1,chudCounter);
232:   chudSetPrivilegeFilter(chudCPU1Dev,PMC_1,chudCountUserEvents);
233:   chudSetPMCEventMask(chudCPU1Dev,PMC_1,0xFE);
234:   if (!chudIsEventValid(chudCPU1Dev,PMC_1,193)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Event is not valid %d",193);
235:   chudStartPMCs();
236: #endif
237: #if defined(PETSC_HAVE_PAPI)
238:   PAPI_library_init(PAPI_VER_CURRENT);
239:   if (ierr != PAPI_VER_CURRENT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Cannot initialize PAPI");
240:   PAPI_query_event(PAPI_FP_INS);
241:   PAPI_create_eventset(&PAPIEventSet);
242:   PAPI_add_event(PAPIEventSet,PAPI_FP_INS);
243:   PAPI_start(PAPIEventSet);
244: #endif

246:   /* All processors sync here for more consistent logging */
247:   MPI_Barrier(PETSC_COMM_WORLD);
248:   PetscTime(BaseTime);
249:   PetscLogStagePush(stage);
250:   return(0);
251: }

255: /*@C
256:   PetscLogBegin - Turns on logging of objects and events. This logs flop
257:   rates and object creation and should not slow programs down too much.
258:   This routine may be called more than once.

260:   Logically Collective over PETSC_COMM_WORLD

262:   Options Database Keys:
263: + -log_summary - Prints summary of flop and timing information to the 
264:                   screen (for code compiled with PETSC_USE_LOG)
265: - -log - Prints detailed log information (for code compiled with PETSC_USE_LOG)

267:   Usage:
268: .vb
269:       PetscInitialize(...);
270:       PetscLogBegin();
271:        ... code ...
272:       PetscLogView(viewer); or PetscLogDump(); 
273:       PetscFinalize();
274: .ve

276:   Notes:
277:   PetscLogView(viewer) or PetscLogDump() actually cause the printing of 
278:   the logging information.

280:   Level: advanced

282: .keywords: log, begin
283: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin()
284: @*/
285: PetscErrorCode  PetscLogBegin(void)
286: {

290:   PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault);
291:   PetscLogBegin_Private();
292:   return(0);
293: }

297: /*@C
298:   PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 
299:   all events. This creates large log files and slows the program down.

301:   Logically Collective on PETSC_COMM_WORLD

303:   Options Database Keys:
304: . -log_all - Prints extensive log information (for code compiled with PETSC_USE_LOG)

306:   Usage:
307: .vb
308:      PetscInitialize(...);
309:      PetscLogAllBegin();
310:      ... code ...
311:      PetscLogDump(filename);
312:      PetscFinalize();
313: .ve

315:   Notes:
316:   A related routine is PetscLogBegin (with the options key -log), which is 
317:   intended for production runs since it logs only flop rates and object
318:   creation (and shouldn't significantly slow the programs).

320:   Level: advanced

322: .keywords: log, all, begin
323: .seealso: PetscLogDump(), PetscLogBegin(), PetscLogTraceBegin()
324: @*/
325: PetscErrorCode  PetscLogAllBegin(void)
326: {

330:   PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete);
331:   PetscLogBegin_Private();
332:   return(0);
333: }

337: /*@
338:   PetscLogTraceBegin - Activates trace logging.  Every time a PETSc event
339:   begins or ends, the event name is printed.

341:   Logically Collective on PETSC_COMM_WORLD

343:   Input Parameter:
344: . file - The file to print trace in (e.g. stdout)

346:   Options Database Key:
347: . -log_trace [filename] - Activates PetscLogTraceBegin()

349:   Notes:
350:   PetscLogTraceBegin() prints the processor number, the execution time (sec),
351:   then "Event begin:" or "Event end:" followed by the event name.

353:   PetscLogTraceBegin() allows tracing of all PETSc calls, which is useful
354:   to determine where a program is hanging without running in the 
355:   debugger.  Can be used in conjunction with the -info option. 

357:   Level: intermediate

359: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogBegin()
360: @*/
361: PetscErrorCode  PetscLogTraceBegin(FILE *file)
362: {

366:   tracefile = file;
367:   PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace);
368:   PetscLogBegin_Private();
369:   return(0);
370: }

374: /*@
375:   PetscLogActions - Determines whether actions are logged for the graphical viewer.

377:   Not Collective

379:   Input Parameter:
380: . flag - PETSC_TRUE if actions are to be logged

382:   Level: intermediate

384:   Note: Logging of actions continues to consume more memory as the program
385:   runs. Long running programs should consider turning this feature off.

387:   Options Database Keys:
388: . -log_exclude_actions - Turns off actions logging

390: .keywords: log, stage, register
391: .seealso: PetscLogStagePush(), PetscLogStagePop()
392: @*/
393: PetscErrorCode  PetscLogActions(PetscBool  flag)
394: {
396:   logActions = flag;
397:   return(0);
398: }

402: /*@
403:   PetscLogObjects - Determines whether objects are logged for the graphical viewer.

405:   Not Collective

407:   Input Parameter:
408: . flag - PETSC_TRUE if objects are to be logged

410:   Level: intermediate

412:   Note: Logging of objects continues to consume more memory as the program
413:   runs. Long running programs should consider turning this feature off.

415:   Options Database Keys:
416: . -log_exclude_objects - Turns off objects logging

418: .keywords: log, stage, register
419: .seealso: PetscLogStagePush(), PetscLogStagePop()
420: @*/
421: PetscErrorCode  PetscLogObjects(PetscBool  flag)
422: {
424:   logObjects = flag;
425:   return(0);
426: }

428: /*------------------------------------------------ Stage Functions --------------------------------------------------*/
431: /*@C
432:   PetscLogStageRegister - Attaches a charactor string name to a logging stage.

434:   Not Collective

436:   Input Parameter:
437: . sname - The name to associate with that stage

439:   Output Parameter:
440: . stage - The stage number

442:   Level: intermediate

444: .keywords: log, stage, register
445: .seealso: PetscLogStagePush(), PetscLogStagePop()
446: @*/
447: PetscErrorCode  PetscLogStageRegister(const char sname[],PetscLogStage *stage)
448: {
449:   PetscStageLog  stageLog;
450:   PetscLogEvent  event;

454:   PetscLogGetStageLog(&stageLog);
455:   PetscStageLogRegister(stageLog, sname, stage);
456:   /* Copy events already changed in the main stage, this sucks */
457:   EventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents);
458:   for (event = 0; event < stageLog->eventLog->numEvents; event++) {
459:     EventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event],&stageLog->stageInfo[*stage].eventLog->eventInfo[event]);
460:   }
461:   ClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses);
462:   return(0);
463: }

467: /*@C
468:   PetscLogStagePush - This function pushes a stage on the stack.

470:   Not Collective

472:   Input Parameter:
473: . stage - The stage on which to log

475:   Usage:
476:   If the option -log_sumary is used to run the program containing the 
477:   following code, then 2 sets of summary data will be printed during
478:   PetscFinalize().
479: .vb
480:       PetscInitialize(int *argc,char ***args,0,0);
481:       [stage 0 of code]   
482:       PetscLogStagePush(1);
483:       [stage 1 of code]
484:       PetscLogStagePop();
485:       PetscBarrier(...);
486:       [more stage 0 of code]   
487:       PetscFinalize();
488: .ve
489:  
490:   Notes:
491:   Use PetscLogStageRegister() to register a stage.

493:   Level: intermediate

495: .keywords: log, push, stage
496: .seealso: PetscLogStagePop(), PetscLogStageRegister(), PetscBarrier()
497: @*/
498: PetscErrorCode  PetscLogStagePush(PetscLogStage stage)
499: {
500:   PetscStageLog  stageLog;

504:   PetscLogGetStageLog(&stageLog);
505:   PetscStageLogPush(stageLog, stage);
506:   return(0);
507: }

511: /*@C
512:   PetscLogStagePop - This function pops a stage from the stack.

514:   Not Collective

516:   Usage:
517:   If the option -log_sumary is used to run the program containing the 
518:   following code, then 2 sets of summary data will be printed during
519:   PetscFinalize().
520: .vb
521:       PetscInitialize(int *argc,char ***args,0,0);
522:       [stage 0 of code]   
523:       PetscLogStagePush(1);
524:       [stage 1 of code]
525:       PetscLogStagePop();
526:       PetscBarrier(...);
527:       [more stage 0 of code]   
528:       PetscFinalize();
529: .ve

531:   Notes:  
532:   Use PetscLogStageRegister() to register a stage.

534:   Level: intermediate

536: .keywords: log, pop, stage
537: .seealso: PetscLogStagePush(), PetscLogStageRegister(), PetscBarrier()
538: @*/
539: PetscErrorCode  PetscLogStagePop(void)
540: {
541:   PetscStageLog  stageLog;

545:   PetscLogGetStageLog(&stageLog);
546:   PetscStageLogPop(stageLog);
547:   return(0);
548: }

552: /*@
553:   PetscLogStageSetActive - Determines stage activity for PetscLogEventBegin() and PetscLogEventEnd().

555:   Not Collective 

557:   Input Parameters:
558: + stage    - The stage
559: - isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)

561:   Level: intermediate

563: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
564: @*/
565: PetscErrorCode  PetscLogStageSetActive(PetscLogStage stage, PetscBool  isActive)
566: {
567:   PetscStageLog  stageLog;

571:   PetscLogGetStageLog(&stageLog);
572:   PetscStageLogSetActive(stageLog, stage, isActive);
573:   return(0);
574: }

578: /*@
579:   PetscLogStageGetActive - Returns stage activity for PetscLogEventBegin() and PetscLogEventEnd().

581:   Not Collective 

583:   Input Parameter:
584: . stage    - The stage

586:   Output Parameter:
587: . isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)

589:   Level: intermediate

591: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
592: @*/
593: PetscErrorCode  PetscLogStageGetActive(PetscLogStage stage, PetscBool  *isActive)
594: {
595:   PetscStageLog  stageLog;

599:   PetscLogGetStageLog(&stageLog);
600:   PetscStageLogGetActive(stageLog, stage, isActive);
601:   return(0);
602: }

606: /*@
607:   PetscLogStageSetVisible - Determines stage visibility in PetscLogView()

609:   Not Collective 

611:   Input Parameters:
612: + stage     - The stage
613: - isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)

615:   Level: intermediate

617: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogView()
618: @*/
619: PetscErrorCode  PetscLogStageSetVisible(PetscLogStage stage, PetscBool  isVisible)
620: {
621:   PetscStageLog  stageLog;

625:   PetscLogGetStageLog(&stageLog);
626:   PetscStageLogSetVisible(stageLog, stage, isVisible);
627:   return(0);
628: }

632: /*@
633:   PetscLogStageGetVisible - Returns stage visibility in PetscLogView()

635:   Not Collective 

637:   Input Parameter:
638: . stage     - The stage

640:   Output Parameter:
641: . isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)

643:   Level: intermediate

645: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogView()
646: @*/
647: PetscErrorCode  PetscLogStageGetVisible(PetscLogStage stage, PetscBool  *isVisible)
648: {
649:   PetscStageLog  stageLog;

653:   PetscLogGetStageLog(&stageLog);
654:   PetscStageLogGetVisible(stageLog, stage, isVisible);
655:   return(0);
656: }

660: /*@C
661:   PetscLogStageGetId - Returns the stage id when given the stage name.

663:   Not Collective 

665:   Input Parameter:
666: . name  - The stage name

668:   Output Parameter:
669: . stage - The stage

671:   Level: intermediate

673: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
674: @*/
675: PetscErrorCode  PetscLogStageGetId(const char name[], PetscLogStage *stage)
676: {
677:   PetscStageLog  stageLog;

681:   PetscLogGetStageLog(&stageLog);
682:   PetscStageLogGetStage(stageLog, name, stage);
683:   return(0);
684: }

686: /*------------------------------------------------ Event Functions --------------------------------------------------*/
689: /*@C
690:   PetscLogEventRegister - Registers an event name for logging operations in an application code. 

692:   Not Collective

694:   Input Parameter:
695: + name   - The name associated with the event
696: - classid - The classid associated to the class for this event, obtain either with
697:            PetscClassIdRegister() or use a predefined one such as KSP_CLASSID, SNES_CLASSID, the predefined ones 
698:            are only available in C code
699:             
700:   Output Parameter:
701: . event - The event id for use with PetscLogEventBegin() and PetscLogEventEnd().

703:   Example of Usage:
704: .vb
705:       PetscLogEvent USER_EVENT;
706:       PetscClassId classid;
707:       PetscLogDouble user_event_flops;
708:       PetscClassIdRegister("class name",&classid);
709:       PetscLogEventRegister("User event name",classid,&USER_EVENT);
710:       PetscLogEventBegin(USER_EVENT,0,0,0,0);
711:          [code segment to monitor]
712:          PetscLogFlops(user_event_flops);
713:       PetscLogEventEnd(USER_EVENT,0,0,0,0);
714: .ve

716:   Notes: 
717:   PETSc automatically logs library events if the code has been
718:   compiled with -DPETSC_USE_LOG (which is the default) and -log,
719:   -log_summary, or -log_all are specified.  PetscLogEventRegister() is
720:   intended for logging user events to supplement this PETSc
721:   information. 

723:   PETSc can gather data for use with the utilities Upshot/Nupshot
724:   (part of the MPICH distribution).  If PETSc has been compiled
725:   with flag -DPETSC_HAVE_MPE (MPE is an additional utility within
726:   MPICH), the user can employ another command line option, -log_mpe,
727:   to create a logfile, "mpe.log", which can be visualized
728:   Upshot/Nupshot. 

730:   The classid is associated with each event so that classes of events
731:   can be disabled simultaneously, such as all matrix events. The user
732:   can either use an existing classid, such as MAT_CLASSID, or create
733:   their own as shown in the example.

735:   Level: intermediate

737: .keywords: log, event, register
738: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogFlops(),
739:           PetscLogEventMPEActivate(), PetscLogEventMPEDeactivate(),
740:           PetscLogEventActivate(), PetscLogEventDeactivate(), PetscClassIdRegister()
741: @*/
742: PetscErrorCode  PetscLogEventRegister(const char name[],PetscClassId classid,PetscLogEvent *event)
743: {
744:   PetscStageLog  stageLog;
745:   int            stage;

749:   *event = PETSC_DECIDE;
750:   PetscLogGetStageLog(&stageLog);
751:   EventRegLogRegister(stageLog->eventLog, name, classid, event);
752:   for (stage = 0; stage < stageLog->numStages; stage++) {
753:     EventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents);
754:     ClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
755:   }
756:   return(0);
757: }

761: /*@
762:   PetscLogEventActivate - Indicates that a particular event should be logged.

764:   Not Collective

766:   Input Parameter:
767: . event - The event id

769:   Usage:
770: .vb
771:       PetscLogEventDeactivate(VEC_SetValues);
772:         [code where you do not want to log VecSetValues()]
773:       PetscLogEventActivate(VEC_SetValues);
774:         [code where you do want to log VecSetValues()]
775: .ve 

777:   Note:
778:   The event may be either a pre-defined PETSc event (found in include/petsclog.h)
779:   or an event number obtained with PetscLogEventRegister().

781:   Level: advanced

783: .keywords: log, event, activate
784: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventDeactivate()
785: @*/
786: PetscErrorCode  PetscLogEventActivate(PetscLogEvent event)
787: {
788:   PetscStageLog  stageLog;
789:   int            stage;

793:   PetscLogGetStageLog(&stageLog);
794:   PetscStageLogGetCurrent(stageLog, &stage);
795:   EventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
796:   return(0);
797: }

801: /*@
802:   PetscLogEventDeactivate - Indicates that a particular event should not be logged. 

804:   Not Collective

806:   Input Parameter:
807: . event - The event id

809:   Usage:
810: .vb
811:       PetscLogEventDeactivate(VEC_SetValues);
812:         [code where you do not want to log VecSetValues()]
813:       PetscLogEventActivate(VEC_SetValues);
814:         [code where you do want to log VecSetValues()]
815: .ve 

817:   Note: 
818:   The event may be either a pre-defined PETSc event (found in
819:   include/petsclog.h) or an event number obtained with PetscLogEventRegister()).

821:   Level: advanced

823: .keywords: log, event, deactivate
824: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventActivate()
825: @*/
826: PetscErrorCode  PetscLogEventDeactivate(PetscLogEvent event)
827: {
828:   PetscStageLog  stageLog;
829:   int            stage;

833:   PetscLogGetStageLog(&stageLog);
834:   PetscStageLogGetCurrent(stageLog, &stage);
835:   EventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
836:   return(0);
837: }

841: /*@
842:   PetscLogEventSetActiveAll - Sets the event activity in every stage.

844:   Not Collective

846:   Input Parameters:
847: + event    - The event id
848: - isActive - The activity flag determining whether the event is logged

850:   Level: advanced

852: .keywords: log, event, activate
853: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventActivate(),PlogEventDeactivate()
854: @*/
855: PetscErrorCode  PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool  isActive)
856: {
857:   PetscStageLog  stageLog;
858:   int            stage;

862:   PetscLogGetStageLog(&stageLog);
863:   for(stage = 0; stage < stageLog->numStages; stage++) {
864:     if (isActive) {
865:       EventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
866:     } else {
867:       EventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
868:     }
869:   }
870:   return(0);
871: }

875: /*@
876:   PetscLogEventActivateClass - Activates event logging for a PETSc object class.

878:   Not Collective

880:   Input Parameter:
881: . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc.

883:   Level: developer

885: .keywords: log, event, activate, class
886: .seealso: PetscInfoActivate(),PetscInfo(),PetscInfoAllow(),PetscLogEventDeactivateClass(), PetscLogEventActivate(),PetscLogEventDeactivate()
887: @*/
888: PetscErrorCode  PetscLogEventActivateClass(PetscClassId classid)
889: {
890:   PetscStageLog  stageLog;
891:   int            stage;

895:   PetscLogGetStageLog(&stageLog);
896:   PetscStageLogGetCurrent(stageLog, &stage);
897:   EventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
898:   return(0);
899: }

903: /*@
904:   PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class.

906:   Not Collective

908:   Input Parameter:
909: . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc.

911:   Level: developer

913: .keywords: log, event, deactivate, class
914: .seealso: PetscInfoActivate(),PetscInfo(),PetscInfoAllow(),PetscLogEventActivateClass(), PetscLogEventActivate(),PetscLogEventDeactivate()
915: @*/
916: PetscErrorCode  PetscLogEventDeactivateClass(PetscClassId classid)
917: {
918:   PetscStageLog  stageLog;
919:   int            stage;

923:   PetscLogGetStageLog(&stageLog);
924:   PetscStageLogGetCurrent(stageLog, &stage);
925:   EventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
926:   return(0);
927: }

929: /*MC
930:    PetscLogEventBegin - Logs the beginning of a user event. 

932:    Synopsis:
933:    PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,
934:                        PetscObject o4)

936:    Not Collective

938:    Input Parameters:
939: +  e - integer associated with the event obtained from PetscLogEventRegister()
940: -  o1,o2,o3,o4 - objects associated with the event, or 0


943:    Fortran Synopsis:
944:    void PetscLogEventBegin(int e,PetscErrorCode ierr)

946:    Usage:
947: .vb
948:      PetscLogEvent USER_EVENT;
949:      PetscLogDouble user_event_flops;
950:      PetscLogEventRegister("User event",0,&USER_EVENT);
951:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
952:         [code segment to monitor]
953:         PetscLogFlops(user_event_flops);
954:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
955: .ve

957:    Notes:
958:    You need to register each integer event with the command 
959:    PetscLogEventRegister().  The source code must be compiled with 
960:    -DPETSC_USE_LOG, which is the default.

962:    PETSc automatically logs library events if the code has been
963:    compiled with -DPETSC_USE_LOG, and -log, -log_summary, or -log_all are
964:    specified.  PetscLogEventBegin() is intended for logging user events
965:    to supplement this PETSc information.

967:    Level: intermediate

969: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops()

971: .keywords: log, event, begin
972: M*/

974: /*MC
975:    PetscLogEventEnd - Log the end of a user event.

977:    Synopsis:
978:    PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,
979:                      PetscObject o4)

981:    Not Collective

983:    Input Parameters:
984: +  e - integer associated with the event obtained with PetscLogEventRegister()
985: -  o1,o2,o3,o4 - objects associated with the event, or 0


988:    Fortran Synopsis:
989:    void PetscLogEventEnd(int e,PetscErrorCode ierr)

991:    Usage:
992: .vb
993:      PetscLogEvent USER_EVENT;
994:      PetscLogDouble user_event_flops;
995:      PetscLogEventRegister("User event",0,&USER_EVENT,);
996:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
997:         [code segment to monitor]
998:         PetscLogFlops(user_event_flops);
999:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
1000: .ve

1002:    Notes:
1003:    You should also register each additional integer event with the command 
1004:    PetscLogEventRegister(). Source code must be compiled with 
1005:    -DPETSC_USE_LOG, which is the default.

1007:    PETSc automatically logs library events if the code has been
1008:    compiled with -DPETSC_USE_LOG, and -log, -log_summary, or -log_all are
1009:    specified.  PetscLogEventEnd() is intended for logging user events
1010:    to supplement this PETSc information.

1012:    Level: intermediate

1014: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogFlops()

1016: .keywords: log, event, end
1017: M*/

1019: /*MC
1020:    PetscLogEventBarrierBegin - Logs the time in a barrier before an event.

1022:    Synopsis:
1023:    PetscErrorCode PetscLogEventBarrierBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,
1024:                   PetscObject o4,MPI_Comm comm)

1026:    Not Collective

1028:    Input Parameters:
1029: .  e - integer associated with the event obtained from PetscLogEventRegister()
1030: .  o1,o2,o3,o4 - objects associated with the event, or 0
1031: .  comm - communicator the barrier takes place over


1034:    Usage:
1035: .vb
1036:      PetscLogEventBarrierBegin(VEC_NormBarrier,0,0,0,0,comm);
1037:        MPI_Allreduce()
1038:      PetscLogEventBarrierEnd(VEC_NormBarrier,0,0,0,0,comm);
1039: .ve

1041:    Notes:
1042:    This is for logging the amount of time spent in a barrier for an event
1043:    that requires synchronization. 

1045:    Additional Notes:
1046:    Synchronization events always come in pairs; for example, VEC_NormBarrier and 
1047:    VEC_NormComm = VEC_NormBarrier + 1

1049:    Level: advanced

1051: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops(), PetscLogEventBegin(),
1052:           PetscLogEventBarrierEnd()

1054: .keywords: log, event, begin, barrier
1055: M*/

1057: /*MC
1058:    PetscLogEventBarrierEnd - Logs the time in a barrier before an event.

1060:    Synopsis:
1061:    PetscErrorCode PetscLogEventBarrierEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,
1062:                   PetscObject o4,MPI_Comm comm)

1064:    Logically Collective on MPI_Comm

1066:    Input Parameters:
1067: .  e - integer associated with the event obtained from PetscLogEventRegister()
1068: .  o1,o2,o3,o4 - objects associated with the event, or 0
1069: .  comm - communicator the barrier takes place over


1072:     Usage:
1073: .vb
1074:      PetscLogEventBarrierBegin(VEC_NormBarrier,0,0,0,0,comm);
1075:        MPI_Allreduce()
1076:      PetscLogEventBarrierEnd(VEC_NormBarrier,0,0,0,0,comm);
1077: .ve

1079:    Notes:
1080:    This is for logging the amount of time spent in a barrier for an event
1081:    that requires synchronization. 

1083:    Additional Notes:
1084:    Synchronization events always come in pairs; for example, VEC_NormBarrier and 
1085:    VEC_NormComm = VEC_NormBarrier + 1

1087:    Level: advanced

1089: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops(), PetscLogEventBegin(),
1090:           PetscLogEventBarrierBegin()

1092: .keywords: log, event, begin, barrier
1093: M*/

1097: /*@C
1098:   PetscLogEventGetId - Returns the event id when given the event name.

1100:   Not Collective 

1102:   Input Parameter:
1103: . name  - The event name

1105:   Output Parameter:
1106: . event - The event

1108:   Level: intermediate

1110: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogStageGetId()
1111: @*/
1112: PetscErrorCode  PetscLogEventGetId(const char name[], PetscLogEvent *event)
1113: {
1114:   PetscStageLog  stageLog;

1118:   PetscLogGetStageLog(&stageLog);
1119:   EventRegLogGetEvent(stageLog->eventLog, name, event);
1120:   return(0);
1121: }


1124: /*------------------------------------------------ Output Functions -------------------------------------------------*/
1127: /*@C
1128:   PetscLogDump - Dumps logs of objects to a file. This file is intended to 
1129:   be read by bin/petscview. This program no longer exists.

1131:   Collective on PETSC_COMM_WORLD

1133:   Input Parameter:
1134: . name - an optional file name

1136:   Options Database Keys:
1137: + -log     - Prints basic log information (for code compiled with PETSC_USE_LOG)
1138: - -log_all - Prints extensive log information (for code compiled with PETSC_USE_LOG)
1139:    
1140:   Usage:
1141: .vb
1142:      PetscInitialize(...);
1143:      PetscLogBegin(); or PetscLogAllBegin(); 
1144:      ... code ...
1145:      PetscLogDump(filename);
1146:      PetscFinalize();
1147: .ve

1149:   Notes:
1150:   The default file name is 
1151: $    Log.<rank>
1152:   where <rank> is the processor number. If no name is specified, 
1153:   this file will be used.

1155:   Level: advanced

1157: .keywords: log, dump
1158: .seealso: PetscLogBegin(), PetscLogAllBegin(), PetscLogView()
1159: @*/
1160: PetscErrorCode  PetscLogDump(const char sname[])
1161: {
1162:   PetscStageLog      stageLog;
1163:   PetscEventPerfInfo *eventInfo;
1164:   FILE               *fd;
1165:   char               file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
1166:   PetscLogDouble     flops, _TotalTime;
1167:   PetscMPIInt        rank;
1168:   int                action, object, curStage;
1169:   PetscLogEvent      event;
1170:   PetscErrorCode     ierr;

1173:   /* Calculate the total elapsed time */
1174:   PetscTime(_TotalTime);
1175:   _TotalTime -= BaseTime;
1176:   /* Open log file */
1177:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
1178:   if (sname) {
1179:     sprintf(file, "%s.%d", sname, rank);
1180:   } else {
1181:     sprintf(file, "Log.%d", rank);
1182:   }
1183:   PetscFixFilename(file, fname);
1184:   PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd);
1185:   if ((!rank) && (!fd)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname);
1186:   /* Output totals */
1187:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flops %14e %16.8e\n", _TotalFlops, _TotalTime);
1188:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0);
1189:   /* Output actions */
1190:   if (logActions) {
1191:     PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", numActions);
1192:     for(action = 0; action < numActions; action++) {
1193:       PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n",
1194:                           actions[action].time, actions[action].action, (int)actions[action].event, (int)actions[action].classid, actions[action].id1,
1195:                           actions[action].id2, actions[action].id3, actions[action].flops, actions[action].mem, actions[action].maxmem);
1196:     }
1197:   }
1198:   /* Output objects */
1199:   if (logObjects) {
1200:     PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", numObjects, numObjectsDestroyed);
1201:     for(object = 0; object < numObjects; object++) {
1202:       PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", objects[object].parent, (int) objects[object].mem);
1203:       if (!objects[object].name[0]) {
1204:         PetscFPrintf(PETSC_COMM_WORLD, fd,"No Name\n");
1205:       } else {
1206:         PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", objects[object].name);
1207:       }
1208:       if (objects[object].info[0] != 0) {
1209:         PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n");
1210:       } else {
1211:         PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", objects[object].info);
1212:       }
1213:     }
1214:   }
1215:   /* Output events */
1216:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n");
1217:   PetscLogGetStageLog(&stageLog);
1218:   PetscIntStackTop(stageLog->stack, &curStage);
1219:   eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo;
1220:   for(event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) {
1221:     if (eventInfo[event].time != 0.0) {
1222:       flops = eventInfo[event].flops/eventInfo[event].time;
1223:     } else {
1224:       flops = 0.0;
1225:     }
1226:     PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count,
1227:                         eventInfo[event].flops, eventInfo[event].time, flops);
1228:   }
1229:   PetscFClose(PETSC_COMM_WORLD, fd);
1230:   return(0);
1231: }

1235: /*@C
1236:   PetscLogViewer - Prints a summary of the logging.

1238:   Collective over MPI_Comm

1240:   Input Parameter:
1241: .  viewer - an ASCII viewer

1243:   Options Database Keys:
1244: . -log_summary - Prints summary of log information (for code compiled with PETSC_USE_LOG)

1246:   Usage:
1247: .vb
1248:      PetscInitialize(...);
1249:      PetscLogBegin();
1250:      ... code ...
1251:      PetscLogView(PetscViewer);
1252:      PetscFinalize(...);
1253: .ve

1255:   Notes:
1256:   By default the summary is printed to stdout.

1258:   Level: beginner
1259:    
1260: .keywords: log, dump, print
1261: .seealso: PetscLogBegin(), PetscLogDump()
1262: @*/
1263: PetscErrorCode  PetscLogView(PetscViewer viewer)
1264: {
1265:   FILE               *fd;
1266:   PetscLogDouble     zero       = 0.0;
1267:   PetscStageLog      stageLog;
1268:   PetscStageInfo     *stageInfo = PETSC_NULL;
1269:   PetscEventPerfInfo *eventInfo = PETSC_NULL;
1270:   PetscClassPerfInfo *classInfo;
1271:   char               arch[10], hostname[64], username[16], pname[PETSC_MAX_PATH_LEN], date[64];
1272:   const char         *name;
1273:   PetscLogDouble     locTotalTime, TotalTime, TotalFlops;
1274:   PetscLogDouble     numMessages, messageLength, avgMessLen, numReductions;
1275:   PetscLogDouble     stageTime, flops, flopr, mem, mess, messLen, red;
1276:   PetscLogDouble     fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1277:   PetscLogDouble     fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1278:   PetscLogDouble     min, max, tot, ratio, avg, x, y;
1279:   PetscLogDouble     minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratCt, totm, totml, totr;
1280:   PetscMPIInt        minCt, maxCt;
1281:   PetscMPIInt        size, rank;
1282:   PetscBool          *localStageUsed,    *stageUsed;
1283:   PetscBool          *localStageVisible, *stageVisible;
1284:   int                numStages, localNumEvents, numEvents;
1285:   int                stage, lastStage, oclass;
1286:   PetscLogEvent      event;
1287:   PetscErrorCode     ierr;
1288:   char               version[256];
1289:   MPI_Comm           comm;

1292:   PetscObjectGetComm((PetscObject)viewer,&comm);
1293:   PetscViewerASCIIGetPointer(viewer,&fd);
1294:   MPI_Comm_size(comm, &size);
1295:   MPI_Comm_rank(comm, &rank);
1296:   /* Pop off any stages the user forgot to remove */
1297:   lastStage = 0;
1298:   PetscLogGetStageLog(&stageLog);
1299:   PetscStageLogGetCurrent(stageLog, &stage);
1300:   while (stage >= 0) {
1301:     lastStage = stage;
1302:     PetscStageLogPop(stageLog);
1303:     PetscStageLogGetCurrent(stageLog, &stage);
1304:   }
1305:   /* Get the total elapsed time */
1306:   PetscTime(locTotalTime);  locTotalTime -= BaseTime;

1308:   PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1309:   PetscFPrintf(comm, fd, "***             WIDEN YOUR WINDOW TO 120 CHARACTERS.  Use 'enscript -r -fCourier9' to print this document            ***\n");
1310:   PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1311:   PetscFPrintf(comm, fd, "\n---------------------------------------------- PETSc Performance Summary: ----------------------------------------------\n\n");
1312:   PetscGetArchType(arch, 10);
1313:   PetscGetHostName(hostname, 64);
1314:   PetscGetUserName(username, 16);
1315:   PetscGetProgramName(pname, PETSC_MAX_PATH_LEN);
1316:   PetscGetDate(date, 64);
1317:   PetscGetVersion(version,256);
1318:   if (size == 1) {
1319:     PetscFPrintf(comm,fd,"%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date);
1320:   } else {
1321:     PetscFPrintf(comm,fd,"%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date);
1322:   }
1323:   PetscFPrintf(comm, fd, "Using %s\n", version);

1325:   /* Must preserve reduction count before we go on */
1326:   red  = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;

1328:   /* Calculate summary information */
1329:   PetscFPrintf(comm, fd, "\n                         Max       Max/Min        Avg      Total \n");
1330:   /*   Time */
1331:   MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1332:   MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1333:   MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1334:   avg  = (tot)/((PetscLogDouble) size);
1335:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1336:   PetscFPrintf(comm, fd, "Time (sec):           %5.3e   %10.5f   %5.3e\n", max, ratio, avg);
1337:   TotalTime = tot;
1338:   /*   Objects */
1339:   avg  = (PetscLogDouble) numObjects;
1340:   MPI_Allreduce(&avg,          &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1341:   MPI_Allreduce(&avg,          &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1342:   MPI_Allreduce(&avg,          &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1343:   avg  = (tot)/((PetscLogDouble) size);
1344:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1345:   PetscFPrintf(comm, fd, "Objects:              %5.3e   %10.5f   %5.3e\n", max, ratio, avg);
1346:   /*   Flops */
1347:   MPI_Allreduce(&_TotalFlops,  &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1348:   MPI_Allreduce(&_TotalFlops,  &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1349:   MPI_Allreduce(&_TotalFlops,  &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1350:   avg  = (tot)/((PetscLogDouble) size);
1351:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1352:   PetscFPrintf(comm, fd, "Flops:                %5.3e   %10.5f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1353:   TotalFlops = tot;
1354:   /*   Flops/sec -- Must talk to Barry here */
1355:   if (locTotalTime != 0.0) flops = _TotalFlops/locTotalTime; else flops = 0.0;
1356:   MPI_Allreduce(&flops,        &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1357:   MPI_Allreduce(&flops,        &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1358:   MPI_Allreduce(&flops,        &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1359:   avg  = (tot)/((PetscLogDouble) size);
1360:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1361:   PetscFPrintf(comm, fd, "Flops/sec:            %5.3e   %10.5f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1362:   /*   Memory */
1363:   PetscMallocGetMaximumUsage(&mem);
1364:   if (mem > 0.0) {
1365:     MPI_Allreduce(&mem,          &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1366:     MPI_Allreduce(&mem,          &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1367:     MPI_Allreduce(&mem,          &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1368:     avg  = (tot)/((PetscLogDouble) size);
1369:     if (min != 0.0) ratio = max/min; else ratio = 0.0;
1370:     PetscFPrintf(comm, fd, "Memory:               %5.3e   %10.5f              %5.3e\n", max, ratio, tot);
1371:   }
1372:   /*   Messages */
1373:   mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1374:   MPI_Allreduce(&mess,         &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1375:   MPI_Allreduce(&mess,         &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1376:   MPI_Allreduce(&mess,         &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1377:   avg  = (tot)/((PetscLogDouble) size);
1378:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1379:   PetscFPrintf(comm, fd, "MPI Messages:         %5.3e   %10.5f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1380:   numMessages = tot;
1381:   /*   Message Lengths */
1382:   mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1383:   MPI_Allreduce(&mess,         &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1384:   MPI_Allreduce(&mess,         &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1385:   MPI_Allreduce(&mess,         &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1386:   if (numMessages != 0) avg = (tot)/(numMessages); else avg = 0.0;
1387:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1388:   PetscFPrintf(comm, fd, "MPI Message Lengths:  %5.3e   %10.5f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1389:   messageLength = tot;
1390:   /*   Reductions */
1391:   MPI_Allreduce(&red,          &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1392:   MPI_Allreduce(&red,          &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1393:   MPI_Allreduce(&red,          &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1394:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1395:   PetscFPrintf(comm, fd, "MPI Reductions:       %5.3e   %10.5f\n", max, ratio);
1396:   numReductions = red; /* wrong because uses count from process zero */
1397:   PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n");
1398:   PetscFPrintf(comm, fd, "                            e.g., VecAXPY() for real vectors of length N --> 2N flops\n");
1399:   PetscFPrintf(comm, fd, "                            and VecAXPY() for complex vectors of length N --> 8N flops\n");

1401:   /* Get total number of stages --
1402:        Currently, a single processor can register more stages than another, but stages must all be registered in order.
1403:        We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
1404:        This seems best accomplished by assoicating a communicator with each stage.
1405:   */
1406:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1407:   PetscMalloc(numStages * sizeof(PetscBool), &localStageUsed);
1408:   PetscMalloc(numStages * sizeof(PetscBool), &stageUsed);
1409:   PetscMalloc(numStages * sizeof(PetscBool), &localStageVisible);
1410:   PetscMalloc(numStages * sizeof(PetscBool), &stageVisible);
1411:   if (numStages > 0) {
1412:     stageInfo = stageLog->stageInfo;
1413:     for(stage = 0; stage < numStages; stage++) {
1414:       if (stage < stageLog->numStages) {
1415:         localStageUsed[stage]    = stageInfo[stage].used;
1416:         localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
1417:       } else {
1418:         localStageUsed[stage]    = PETSC_FALSE;
1419:         localStageVisible[stage] = PETSC_TRUE;
1420:       }
1421:     }
1422:     MPI_Allreduce(localStageUsed,    stageUsed,    numStages, MPI_INT, MPI_LOR,  comm);
1423:     MPI_Allreduce(localStageVisible, stageVisible, numStages, MPI_INT, MPI_LAND, comm);
1424:     for(stage = 0; stage < numStages; stage++) {
1425:       if (stageUsed[stage]) {
1426:         PetscFPrintf(comm, fd, "\nSummary of Stages:   ----- Time ------  ----- Flops -----  --- Messages ---  -- Message Lengths --  -- Reductions --\n");
1427:         PetscFPrintf(comm, fd, "                        Avg     %%Total     Avg     %%Total   counts   %%Total     Avg         %%Total   counts   %%Total \n");
1428:         break;
1429:       }
1430:     }
1431:     for(stage = 0; stage < numStages; stage++) {
1432:       if (!stageUsed[stage]) continue;
1433:       if (localStageUsed[stage]) {
1434:         MPI_Allreduce(&stageInfo[stage].perfInfo.time,          &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1435:         MPI_Allreduce(&stageInfo[stage].perfInfo.flops,         &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1436:         MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages,   &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1437:         MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1438:         MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1439:         name = stageInfo[stage].name;
1440:       } else {
1441:         MPI_Allreduce(&zero,                           &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1442:         MPI_Allreduce(&zero,                           &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1443:         MPI_Allreduce(&zero,                           &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1444:         MPI_Allreduce(&zero,                           &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1445:         MPI_Allreduce(&zero,                           &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1446:         name = "";
1447:       }
1448:       mess *= 0.5; messLen *= 0.5; red /= size;
1449:       if (TotalTime     != 0.0) fracTime       = stageTime/TotalTime;    else fracTime       = 0.0;
1450:       if (TotalFlops    != 0.0) fracFlops      = flops/TotalFlops;       else fracFlops      = 0.0;
1451:       /* Talk to Barry if (stageTime     != 0.0) flops          = (size*flops)/stageTime; else flops          = 0.0; */
1452:       if (numMessages   != 0.0) fracMessages   = mess/numMessages;       else fracMessages   = 0.0;
1453:       if (numMessages   != 0.0) avgMessLen     = messLen/numMessages;    else avgMessLen     = 0.0;
1454:       if (messageLength != 0.0) fracLength     = messLen/messageLength;  else fracLength     = 0.0;
1455:       if (numReductions != 0.0) fracReductions = red/numReductions;      else fracReductions = 0.0;
1456:       PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%%  %6.4e %5.1f%%  %5.3e %5.1f%%  %5.3e      %5.1f%%  %5.3e %5.1f%% \n",
1457:                           stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops,
1458:                           mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions);
1459:     }
1460:   }

1462:   PetscFPrintf(comm, fd,
1463:     "\n------------------------------------------------------------------------------------------------------------------------\n");
1464: 
1465:   PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n");
1466:   PetscFPrintf(comm, fd, "Phase summary info:\n");
1467:   PetscFPrintf(comm, fd, "   Count: number of times phase was executed\n");
1468:   PetscFPrintf(comm, fd, "   Time and Flops: Max - maximum over all processors\n");
1469:   PetscFPrintf(comm, fd, "                   Ratio - ratio of maximum to minimum over all processors\n");
1470:   PetscFPrintf(comm, fd, "   Mess: number of messages sent\n");
1471:   PetscFPrintf(comm, fd, "   Avg. len: average message length\n");
1472:   PetscFPrintf(comm, fd, "   Reduct: number of global reductions\n");
1473:   PetscFPrintf(comm, fd, "   Global: entire computation\n");
1474:   PetscFPrintf(comm, fd, "   Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n");
1475:   PetscFPrintf(comm, fd, "      %%T - percent time in this phase         %%F - percent flops in this phase\n");
1476:   PetscFPrintf(comm, fd, "      %%M - percent messages in this phase     %%L - percent message lengths in this phase\n");
1477:   PetscFPrintf(comm, fd, "      %%R - percent reductions in this phase\n");
1478:   PetscFPrintf(comm, fd, "   Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)\n");
1479:   PetscFPrintf(comm, fd,
1480:     "------------------------------------------------------------------------------------------------------------------------\n");
1481: 

1483: #if defined(PETSC_USE_DEBUG)
1484:   PetscFPrintf(comm, fd, "\n\n");
1485:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1486:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1487:   PetscFPrintf(comm, fd, "      #                          WARNING!!!                    #\n");
1488:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1489:   PetscFPrintf(comm, fd, "      #   This code was compiled with a debugging option,      #\n");
1490:   PetscFPrintf(comm, fd, "      #   To get timing results run ./configure                #\n");
1491:   PetscFPrintf(comm, fd, "      #   using --with-debugging=no, the performance will      #\n");
1492:   PetscFPrintf(comm, fd, "      #   be generally two or three times faster.              #\n");
1493:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1494:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1495: #endif
1496: #if defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_FORTRAN_KERNELS)
1497:   PetscFPrintf(comm, fd, "\n\n");
1498:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1499:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1500:   PetscFPrintf(comm, fd, "      #                          WARNING!!!                    #\n");
1501:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1502:   PetscFPrintf(comm, fd, "      #   The code for various complex numbers numerical       #\n");
1503:   PetscFPrintf(comm, fd, "      #   kernels uses C++, which generally is not well        #\n");
1504:   PetscFPrintf(comm, fd, "      #   optimized.  For performance that is about 4-5 times  #\n");
1505:   PetscFPrintf(comm, fd, "      #   faster, specify --with-fortran-kernels=1             #\n");
1506:   PetscFPrintf(comm, fd, "      #   when running ./configure.py.                         #\n");
1507:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1508:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1509: #endif

1511:   /* Report events */
1512:   PetscFPrintf(comm, fd,
1513:     "Event                Count      Time (sec)     Flops                             --- Global ---  --- Stage ---   Total\n");
1514: 
1515:   PetscFPrintf(comm, fd,
1516:     "                   Max Ratio  Max     Ratio   Max  Ratio  Mess   Avg len Reduct  %%T %%F %%M %%L %%R  %%T %%F %%M %%L %%R Mflop/s\n");
1517: 
1518:   PetscFPrintf(comm,fd,
1519:     "------------------------------------------------------------------------------------------------------------------------\n");

1521: 
1522:   /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1523:   for(stage = 0; stage < numStages; stage++) {
1524:     if (!stageVisible[stage]) continue;
1525:     if (localStageUsed[stage]) {
1526:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1527:       MPI_Allreduce(&stageInfo[stage].perfInfo.time,          &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1528:       MPI_Allreduce(&stageInfo[stage].perfInfo.flops,         &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1529:       MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages,   &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1530:       MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1531:       MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1532:     } else {
1533:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1534:       MPI_Allreduce(&zero,                           &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1535:       MPI_Allreduce(&zero,                           &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1536:       MPI_Allreduce(&zero,                           &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1537:       MPI_Allreduce(&zero,                           &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1538:       MPI_Allreduce(&zero,                           &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1539:     }
1540:     mess *= 0.5; messLen *= 0.5; red /= size;

1542:     /* Get total number of events in this stage --
1543:        Currently, a single processor can register more events than another, but events must all be registered in order,
1544:        just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
1545:        on the event ID. This seems best accomplished by assoicating a communicator with each stage.

1547:        Problem: If the event did not happen on proc 1, its name will not be available.
1548:        Problem: Event visibility is not implemented
1549:     */
1550:     if (localStageUsed[stage]) {
1551:       eventInfo      = stageLog->stageInfo[stage].eventLog->eventInfo;
1552:       localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1553:     } else {
1554:       localNumEvents = 0;
1555:     }
1556:     MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1557:     for(event = 0; event < numEvents; event++) {
1558:       if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
1559:         if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) {
1560:           flopr = eventInfo[event].flops;
1561:         } else {
1562:           flopr = 0.0;
1563:         }
1564:         MPI_Allreduce(&flopr,                          &minf,  1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1565:         MPI_Allreduce(&flopr,                          &maxf,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1566:         MPI_Allreduce(&eventInfo[event].flops,         &totf,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1567:         MPI_Allreduce(&eventInfo[event].time,          &mint,  1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1568:         MPI_Allreduce(&eventInfo[event].time,          &maxt,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1569:         MPI_Allreduce(&eventInfo[event].time,          &tott,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1570:         MPI_Allreduce(&eventInfo[event].numMessages,   &totm,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1571:         MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1572:         MPI_Allreduce(&eventInfo[event].numReductions, &totr,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1573:         MPI_Allreduce(&eventInfo[event].count,         &minCt, 1, MPI_INT,             MPI_MIN, comm);
1574:         MPI_Allreduce(&eventInfo[event].count,         &maxCt, 1, MPI_INT,             MPI_MAX, comm);
1575:         name = stageLog->eventLog->eventInfo[event].name;
1576:       } else {
1577:         flopr = 0.0;
1578:         MPI_Allreduce(&flopr,                          &minf,  1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1579:         MPI_Allreduce(&flopr,                          &maxf,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1580:         MPI_Allreduce(&zero,                           &totf,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1581:         MPI_Allreduce(&zero,                           &mint,  1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1582:         MPI_Allreduce(&zero,                           &maxt,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1583:         MPI_Allreduce(&zero,                           &tott,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1584:         MPI_Allreduce(&zero,                           &totm,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1585:         MPI_Allreduce(&zero,                           &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1586:         MPI_Allreduce(&zero,                           &totr,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1587:         MPI_Allreduce(&ierr,                           &minCt, 1, MPI_INT,             MPI_MIN, comm);
1588:         MPI_Allreduce(&ierr,                           &maxCt, 1, MPI_INT,             MPI_MAX, comm);
1589:         name = "";
1590:       }
1591:       if (mint < 0.0) {
1592:         PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n",mint,name);
1593:         mint = 0;
1594:       }
1595:       if (minf < 0.0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Minimum flops %g over all processors for %s is negative! Not possible!",minf,name);
1596:       totm *= 0.5; totml *= 0.5; totr /= size;
1597: 
1598:       if (maxCt != 0) {
1599:         if (minCt         != 0)   ratCt            = ((PetscLogDouble) maxCt)/minCt; else ratCt            = 0.0;
1600:         if (mint          != 0.0) ratt             = maxt/mint;                  else ratt             = 0.0;
1601:         if (minf          != 0.0) ratf             = maxf/minf;                  else ratf             = 0.0;
1602:         if (TotalTime     != 0.0) fracTime         = tott/TotalTime;             else fracTime         = 0.0;
1603:         if (TotalFlops    != 0.0) fracFlops        = totf/TotalFlops;            else fracFlops        = 0.0;
1604:         if (stageTime     != 0.0) fracStageTime    = tott/stageTime;             else fracStageTime    = 0.0;
1605:         if (flops         != 0.0) fracStageFlops   = totf/flops;                 else fracStageFlops   = 0.0;
1606:         if (numMessages   != 0.0) fracMess         = totm/numMessages;           else fracMess         = 0.0;
1607:         if (messageLength != 0.0) fracMessLen      = totml/messageLength;        else fracMessLen      = 0.0;
1608:         if (numReductions != 0.0) fracRed          = totr/numReductions;         else fracRed          = 0.0;
1609:         if (mess          != 0.0) fracStageMess    = totm/mess;                  else fracStageMess    = 0.0;
1610:         if (messLen       != 0.0) fracStageMessLen = totml/messLen;              else fracStageMessLen = 0.0;
1611:         if (red           != 0.0) fracStageRed     = totr/red;                   else fracStageRed     = 0.0;
1612:         if (totm          != 0.0) totml           /= totm;                       else totml            = 0.0;
1613:         if (maxt          != 0.0) flopr            = totf/maxt;                  else flopr            = 0.0;
1614:         PetscFPrintf(comm, fd,
1615:           "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f\n",
1616:                             name, maxCt, ratCt, maxt, ratt, maxf, ratf, totm, totml, totr,
1617:                             100.0*fracTime, 100.0*fracFlops, 100.0*fracMess, 100.0*fracMessLen, 100.0*fracRed,
1618:                             100.0*fracStageTime, 100.0*fracStageFlops, 100.0*fracStageMess, 100.0*fracStageMessLen, 100.0*fracStageRed,
1619:                             flopr/1.0e6);
1620:       }
1621:     }
1622:   }

1624:   /* Memory usage and object creation */
1625:   PetscFPrintf(comm, fd,
1626:     "------------------------------------------------------------------------------------------------------------------------\n");
1627:   PetscFPrintf(comm, fd, "\n");
1628:   PetscFPrintf(comm, fd, "Memory usage is given in bytes:\n\n");

1630:   /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1631:      the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1632:      stats for stages local to processor sets.
1633:   */
1634:   /* We should figure out the longest object name here (now 20 characters) */
1635:   PetscFPrintf(comm, fd, "Object Type          Creations   Destructions     Memory  Descendants' Mem.\n");
1636:   PetscFPrintf(comm, fd, "Reports information only for process 0.\n");
1637:   for(stage = 0; stage < numStages; stage++) {
1638:     if (localStageUsed[stage]) {
1639:       classInfo = stageLog->stageInfo[stage].classLog->classInfo;
1640:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1641:       for(oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) {
1642:         if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) {
1643:           PetscFPrintf(comm, fd, "%20s %5d          %5d  %11.0f     %g\n", stageLog->classLog->classInfo[oclass].name,
1644:                               classInfo[oclass].creations, classInfo[oclass].destructions, classInfo[oclass].mem,
1645:                               classInfo[oclass].descMem);
1646:         }
1647:       }
1648:     } else {
1649:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1650:     }
1651:   }

1653:   PetscFree(localStageUsed);
1654:   PetscFree(stageUsed);
1655:   PetscFree(localStageVisible);
1656:   PetscFree(stageVisible);

1658:   /* Information unrelated to this particular run */
1659:   PetscFPrintf(comm, fd,
1660:     "========================================================================================================================\n");
1661:   PetscTime(y);
1662:   PetscTime(x);
1663:   PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
1664:   PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
1665:   PetscFPrintf(comm,fd,"Average time to get PetscTime(): %g\n", (y-x)/10.0);
1666:   /* MPI information */
1667:   if (size > 1) {
1668:     MPI_Status  status;
1669:     PetscMPIInt tag;
1670:     MPI_Comm    newcomm;

1672:     MPI_Barrier(comm);
1673:     PetscTime(x);
1674:     MPI_Barrier(comm);
1675:     MPI_Barrier(comm);
1676:     MPI_Barrier(comm);
1677:     MPI_Barrier(comm);
1678:     MPI_Barrier(comm);
1679:     PetscTime(y);
1680:     PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y-x)/5.0);
1681:     PetscCommDuplicate(comm,&newcomm, &tag);
1682:     MPI_Barrier(comm);
1683:     if (rank) {
1684:       MPI_Recv(0, 0, MPI_INT, rank-1,            tag, newcomm, &status);
1685:       MPI_Send(0, 0, MPI_INT, (rank+1)%size, tag, newcomm);
1686:     } else {
1687:       PetscTime(x);
1688:       MPI_Send(0, 0, MPI_INT, 1,          tag, newcomm);
1689:       MPI_Recv(0, 0, MPI_INT, size-1, tag, newcomm, &status);
1690:       PetscTime(y);
1691:       PetscFPrintf(comm,fd,"Average time for zero size MPI_Send(): %g\n", (y-x)/size);
1692:     }
1693:     PetscCommDestroy(&newcomm);
1694:   }
1695:   PetscOptionsView(viewer);

1697:   /* Machine and compile information */
1698: #if defined(PETSC_USE_FORTRAN_KERNELS)
1699:   PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n");
1700: #else
1701:   PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n");
1702: #endif
1703: #if defined(PETSC_USE_REAL_SINGLE)
1704:   PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n");
1705: #elif defined(PETSC_USE_LONGDOUBLE)
1706:   PetscFPrintf(comm, fd, "Compiled with long double precision PetscScalar and PetscReal\n");
1707: #endif

1709: #if defined(PETSC_USE_REAL_MAT_SINGLE)
1710:   PetscFPrintf(comm, fd, "Compiled with single precision matrices\n");
1711: #else
1712:   PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n");
1713: #endif
1714:   PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d\n",
1715:                       (int) sizeof(short), (int) sizeof(int), (int) sizeof(long), (int) sizeof(void*),(int) sizeof(PetscScalar));

1717:   PetscFPrintf(comm, fd, "Configure run at: %s\n",petscconfigureruntime);
1718:   PetscFPrintf(comm, fd, "Configure options: %s",petscconfigureoptions);
1719:   PetscFPrintf(comm, fd, "%s", petscmachineinfo);
1720:   PetscFPrintf(comm, fd, "%s", petsccompilerinfo);
1721:   PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo);
1722:   PetscFPrintf(comm, fd, "%s", petsclinkerinfo);

1724:   /* Cleanup */
1725:   PetscFPrintf(comm, fd, "\n");
1726:   PetscStageLogPush(stageLog, lastStage);
1727:   return(0);
1728: }

1732: /*@C
1733:   PetscLogPrintDetailed - Each process prints the times for its own events

1735:   Collective over MPI_Comm

1737:   Input Parameter:
1738: + comm - The MPI communicator (only one processor prints output)
1739: - file - [Optional] The output file name

1741:   Options Database Keys:
1742: . -log_summary_detailed - Prints summary of log information (for code compiled with PETSC_USE_LOG)

1744:   Usage:
1745: .vb
1746:      PetscInitialize(...);
1747:      PetscLogBegin();
1748:      ... code ...
1749:      PetscLogPrintDetailed(MPI_Comm,filename);
1750:      PetscFinalize(...);
1751: .ve

1753:   Notes:
1754:   By default the summary is printed to stdout.

1756:   Level: beginner
1757:    
1758: .keywords: log, dump, print
1759: .seealso: PetscLogBegin(), PetscLogDump(), PetscLogView()
1760: @*/
1761: PetscErrorCode  PetscLogPrintDetailed(MPI_Comm comm, const char filename[])
1762: {
1763:   FILE               *fd                                       = PETSC_STDOUT;
1764:   PetscStageLog      stageLog;
1765:   PetscStageInfo     *stageInfo                                = PETSC_NULL;
1766:   PetscEventPerfInfo *eventInfo                                = PETSC_NULL;
1767:   const char         *name                                     = PETSC_NULL;
1768:   PetscLogDouble     TotalTime;
1769:   PetscLogDouble     stageTime, flops, flopr, mess, messLen, red;
1770:   PetscLogDouble     maxf, totf, maxt, tott, totm, totml, totr = 0.0;
1771:   PetscMPIInt        maxCt;
1772:   PetscBool          *stageUsed;
1773:   PetscBool          *stageVisible;
1774:   int                numStages, numEvents;
1775:   int                stage;
1776:   PetscLogEvent      event;
1777:   PetscErrorCode     ierr;

1780:   /* Pop off any stages the user forgot to remove */
1781:   PetscLogGetStageLog(&stageLog);
1782:   PetscStageLogGetCurrent(stageLog, &stage);
1783:   while (stage >= 0) {
1784:     PetscStageLogPop(stageLog);
1785:     PetscStageLogGetCurrent(stageLog, &stage);
1786:   }
1787:   /* Get the total elapsed time */
1788:   PetscTime(TotalTime);  TotalTime -= BaseTime;
1789:   /* Open the summary file */
1790:   if (filename) {
1791:     PetscFOpen(comm, filename, "w", &fd);
1792:   }

1794:   PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1795:   PetscFPrintf(comm, fd, "***             WIDEN YOUR WINDOW TO 120 CHARACTERS.  Use 'enscript -r -fCourier9' to print this document            ***\n");
1796:   PetscFPrintf(comm, fd, "************************************************************************************************************************\n");


1799:   numStages = stageLog->numStages;
1800:   PetscMalloc(numStages * sizeof(PetscBool), &stageUsed);
1801:   PetscMalloc(numStages * sizeof(PetscBool), &stageVisible);
1802:   if (numStages > 0) {
1803:     stageInfo = stageLog->stageInfo;
1804:     for(stage = 0; stage < numStages; stage++) {
1805:       if (stage < stageLog->numStages) {
1806:         stageUsed[stage]    = stageInfo[stage].used;
1807:         stageVisible[stage] = stageInfo[stage].perfInfo.visible;
1808:       } else {
1809:         stageUsed[stage]    = PETSC_FALSE;
1810:         stageVisible[stage] = PETSC_TRUE;
1811:       }
1812:     }
1813:   }

1815:   /* Report events */
1816:   PetscFPrintf(comm, fd,"Event                Count      Time (sec)     Flops/sec                          \n");
1817:   PetscFPrintf(comm, fd,"                                                            Mess   Avg len Reduct \n");
1818:   PetscFPrintf(comm,fd,"-----------------------------------------------------------------------------------\n");
1819:   /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1820:   for(stage = 0; stage < numStages; stage++) {
1821:     if (!stageVisible[stage]) continue;
1822:     if (stageUsed[stage]) {
1823:       PetscSynchronizedFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1824:       MPI_Allreduce(&stageInfo[stage].perfInfo.time,          &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1825:       MPI_Allreduce(&stageInfo[stage].perfInfo.flops,         &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1826:       MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages,   &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1827:       MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1828:       MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1829:     }
1830:     mess *= 0.5; messLen *= 0.5;

1832:     /* Get total number of events in this stage --
1833:     */
1834:     if (stageUsed[stage]) {
1835:       eventInfo      = stageLog->stageInfo[stage].eventLog->eventInfo;
1836:       numEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1837:     } else {
1838:       numEvents = 0;
1839:     }
1840:     for(event = 0; event < numEvents; event++) {
1841:       if (stageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents)) {
1842:         if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) {
1843:           flopr = eventInfo[event].flops/eventInfo[event].time;
1844:         } else {
1845:           flopr = 0.0;
1846:         }
1847:         MPI_Allreduce(&flopr,                          &maxf,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PETSC_COMM_SELF);
1848:         MPI_Allreduce(&eventInfo[event].flops,         &totf,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1849:         MPI_Allreduce(&eventInfo[event].time,          &maxt,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PETSC_COMM_SELF);
1850:         MPI_Allreduce(&eventInfo[event].time,          &tott,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1851:         MPI_Allreduce(&eventInfo[event].numMessages,   &totm,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1852:         MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1853:         totr = eventInfo[event].numReductions;
1854:         MPI_Allreduce(&eventInfo[event].count,         &maxCt, 1, MPI_INT,             MPI_MAX, PETSC_COMM_SELF);
1855:         name = stageLog->eventLog->eventInfo[event].name;
1856:         totm *= 0.5; totml *= 0.5;
1857:       }
1858: 
1859:       if (maxCt != 0) {
1860:         if (totm          != 0.0) totml           /= totm;                       else totml            = 0.0;
1861:         PetscSynchronizedFPrintf(comm, fd,"%-16s %7d      %5.4e      %3.2e      %2.1e %2.1e %2.1e\n",name, maxCt,  maxt,  maxf, totm, totml, totr);
1862:       }
1863:     }
1864:   }
1865:   PetscSynchronizedFlush(comm);

1867:   PetscFree(stageUsed);
1868:   PetscFree(stageVisible);

1870:   PetscFClose(comm, fd);
1871:   return(0);
1872: }

1874: /*----------------------------------------------- Counter Functions -------------------------------------------------*/
1877: /*@C
1878:    PetscGetFlops - Returns the number of flops used on this processor 
1879:    since the program began. 

1881:    Not Collective

1883:    Output Parameter:
1884:    flops - number of floating point operations 

1886:    Notes:
1887:    A global counter logs all PETSc flop counts.  The user can use
1888:    PetscLogFlops() to increment this counter to include flops for the 
1889:    application code.  

1891:    PETSc automatically logs library events if the code has been
1892:    compiled with -DPETSC_USE_LOG (which is the default), and -log,
1893:    -log_summary, or -log_all are specified.  PetscLogFlops() is
1894:    intended for logging user flops to supplement this PETSc
1895:    information.

1897:    Level: intermediate

1899: .keywords: log, flops, floating point operations

1901: .seealso: PetscGetTime(), PetscLogFlops()
1902: @*/
1903: PetscErrorCode  PetscGetFlops(PetscLogDouble *flops)
1904: {
1906:   *flops = _TotalFlops;
1907:   return(0);
1908: }

1912: PetscErrorCode  PetscLogObjectState(PetscObject obj, const char format[], ...)
1913: {
1915:   size_t         fullLength;
1916:   va_list        Argp;

1919:   if (!logObjects) return(0);
1920:   va_start(Argp, format);
1921:   PetscVSNPrintf(objects[obj->id].info, 64,format,&fullLength, Argp);
1922:   va_end(Argp);
1923:   return(0);
1924: }


1927: /*MC
1928:    PetscLogFlops - Adds floating point operations to the global counter.

1930:    Synopsis:
1931:    PetscErrorCode PetscLogFlops(PetscLogDouble f)

1933:    Not Collective

1935:    Input Parameter:
1936: .  f - flop counter


1939:    Usage:
1940: .vb
1941:      PetscLogEvent USER_EVENT;
1942:      PetscLogEventRegister("User event",0,&USER_EVENT);
1943:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
1944:         [code segment to monitor]
1945:         PetscLogFlops(user_flops)
1946:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
1947: .ve

1949:    Notes:
1950:    A global counter logs all PETSc flop counts.  The user can use
1951:    PetscLogFlops() to increment this counter to include flops for the 
1952:    application code.  

1954:    PETSc automatically logs library events if the code has been
1955:    compiled with -DPETSC_USE_LOG (which is the default), and -log,
1956:    -log_summary, or -log_all are specified.  PetscLogFlops() is
1957:    intended for logging user flops to supplement this PETSc
1958:    information.

1960:    Level: intermediate

1962: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscGetFlops()

1964: .keywords: log, flops, floating point operations
1965: M*/

1967: /*MC
1968:    PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice)
1969:     to get accurate timings

1971:    Synopsis:
1972:    void PetscPreLoadBegin(PetscBool  flag,char *name);

1974:    Not Collective

1976:    Input Parameter:
1977: +   flag - PETSC_TRUE to run twice, PETSC_FALSE to run once, may be overridden
1978:            with command line option -preload true or -preload false
1979: -   name - name of first stage (lines of code timed separately with -log_summary) to
1980:            be preloaded

1982:    Usage:
1983: .vb
1984:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
1985:        lines of code
1986:        PetscPreLoadStage("second stage");
1987:        lines of code
1988:      PetscPreLoadEnd();
1989: .ve

1991:    Notes: Only works in C/C++, not Fortran

1993:      Flags available within the macro. 
1994: +    PetscPreLoadingUsed - true if we are or have done preloading 
1995: .    PetscPreLoadingOn - true if it is CURRENTLY doing preload
1996: .    PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second
1997: -    PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on
1998:      The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin()
1999:      and PetscPreLoadEnd()

2001:    Level: intermediate

2003: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadEnd(), PetscPreLoadStage()

2005:    Concepts: preloading
2006:    Concepts: timing^accurate
2007:    Concepts: paging^eliminating effects of


2010: M*/

2012: /*MC
2013:    PetscPreLoadEnd - End a segment of code that may be preloaded (run twice)
2014:     to get accurate timings

2016:    Synopsis:
2017:    void PetscPreLoadEnd(void);

2019:    Not Collective

2021:    Usage:
2022: .vb
2023:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2024:        lines of code
2025:        PetscPreLoadStage("second stage");
2026:        lines of code
2027:      PetscPreLoadEnd();
2028: .ve

2030:    Notes: only works in C/C++ not fortran

2032:    Level: intermediate

2034: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadStage()

2036: M*/

2038: /*MC
2039:    PetscPreLoadStage - Start a new segment of code to be timed separately.
2040:     to get accurate timings

2042:    Synopsis:
2043:    void PetscPreLoadStage(char *name);

2045:    Not Collective

2047:    Usage:
2048: .vb
2049:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2050:        lines of code
2051:        PetscPreLoadStage("second stage");
2052:        lines of code
2053:      PetscPreLoadEnd();
2054: .ve

2056:    Notes: only works in C/C++ not fortran

2058:    Level: intermediate

2060: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd()

2062: M*/

2064: #include <../src/sys/viewer/impls/ascii/asciiimpl.h>  /*I     "petscsys.h"   I*/
2067: /*@ 
2068:    PetscLogViewPython - Saves logging information in a Python format.

2070:    Collective on PetscViewer

2072:    Input Paramter:
2073: .   viewer - viewer to save Python data

2075:   Level: intermediate 

2077: @*/
2078: PetscErrorCode  PetscLogViewPython(PetscViewer viewer)
2079: {
2080:   PetscViewer_ASCII  *ascii                  = (PetscViewer_ASCII*)viewer->data;
2081:   FILE               *fd                     = ascii->fd;
2082:   PetscLogDouble     zero                    = 0.0;
2083:   PetscStageLog      stageLog;
2084:   PetscStageInfo     *stageInfo              = PETSC_NULL;
2085:   PetscEventPerfInfo *eventInfo              = PETSC_NULL;
2086:   const char         *name;
2087:   char               stageName[2048];
2088:   PetscLogDouble     locTotalTime, TotalTime = 0, TotalFlops = 0;
2089:   PetscLogDouble     numMessages             = 0, messageLength = 0, avgMessLen, numReductions = 0;
2090:   PetscLogDouble     stageTime, flops, mem, mess, messLen, red;
2091:   PetscLogDouble     fracTime, fracFlops, fracMessages, fracLength;
2092:   PetscLogDouble     fracReductions;
2093:   PetscLogDouble     tot,avg,x,y,*mydata;
2094:   PetscMPIInt        maxCt;
2095:   PetscMPIInt        size, rank, *mycount;
2096:   PetscBool          *localStageUsed,    *stageUsed;
2097:   PetscBool          *localStageVisible, *stageVisible;
2098:   int                numStages, localNumEvents, numEvents;
2099:   int                stage, lastStage;
2100:   PetscLogEvent      event;
2101:   PetscErrorCode     ierr;
2102:   PetscInt           i;
2103:   MPI_Comm           comm;

2106:   PetscObjectGetComm((PetscObject)viewer,&comm);
2107:   MPI_Comm_size(comm, &size);
2108:   MPI_Comm_rank(comm, &rank);
2109:   PetscMalloc(size*sizeof(PetscLogDouble), &mydata);
2110:   PetscMalloc(size*sizeof(PetscMPIInt), &mycount);

2112:   /* Pop off any stages the user forgot to remove */
2113:   lastStage = 0;
2114:   PetscLogGetStageLog(&stageLog);
2115:   PetscStageLogGetCurrent(stageLog, &stage);
2116:   while (stage >= 0) {
2117:     lastStage = stage;
2118:     PetscStageLogPop(stageLog);
2119:     PetscStageLogGetCurrent(stageLog, &stage);
2120:   }
2121:   /* Get the total elapsed time */
2122:   PetscTime(locTotalTime);  locTotalTime -= BaseTime;

2124:   PetscFPrintf(comm, fd, "\n#------ PETSc Performance Summary ----------\n\n");
2125:   PetscFPrintf(comm, fd, "Nproc = %d\n",size);

2127:   /* Must preserve reduction count before we go on */
2128:   red  = (petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct)/((PetscLogDouble) size);

2130:   /* Calculate summary information */

2132:   /*   Time */
2133:   MPI_Gather(&locTotalTime,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2134:   if (!rank){
2135:     PetscFPrintf(comm, fd, "Time = [ " );
2136:     tot  = 0.0;
2137:     for (i=0; i<size; i++){
2138:       tot += mydata[i];
2139:       PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2140:     }
2141:     PetscFPrintf(comm, fd, "]\n" );
2142:     avg  = (tot)/((PetscLogDouble) size);
2143:     TotalTime = tot;
2144:   }

2146:   /*   Objects */
2147:   avg  = (PetscLogDouble) numObjects;
2148:   MPI_Gather(&avg,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2149:   if (!rank){
2150:     PetscFPrintf(comm, fd, "Objects = [ " );
2151:     for (i=0; i<size; i++){
2152:       PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2153:     }
2154:     PetscFPrintf(comm, fd, "]\n" );
2155:   }

2157:   /*   Flops */
2158:   MPI_Gather(&_TotalFlops,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2159:   if (!rank){
2160:     PetscFPrintf(comm, fd, "Flops = [ " );
2161:     tot  = 0.0;
2162:     for (i=0; i<size; i++){
2163:       tot += mydata[i];
2164:       PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2165:     }
2166:     PetscFPrintf(comm, fd, "]\n");
2167:     TotalFlops = tot;
2168:   }

2170:   /*   Memory */
2171:   PetscMallocGetMaximumUsage(&mem);
2172:   MPI_Gather(&mem,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2173:   if (!rank){
2174:     PetscFPrintf(comm, fd, "Memory = [ " );
2175:     for (i=0; i<size; i++){
2176:       PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2177:     }
2178:     PetscFPrintf(comm, fd, "]\n" );
2179:   }

2181:   /*   Messages */
2182:   mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
2183:   MPI_Gather(&mess,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2184:   if (!rank){
2185:     PetscFPrintf(comm, fd, "MPIMessages = [ " );
2186:     tot  = 0.0;
2187:     for (i=0; i<size; i++){
2188:       tot += mydata[i];
2189:       PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2190:     }
2191:     PetscFPrintf(comm, fd, "]\n" );
2192:     numMessages = tot;
2193:   }

2195:   /*   Message Lengths */
2196:   mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
2197:   MPI_Gather(&mess,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2198:   if (!rank){
2199:     PetscFPrintf(comm, fd, "MPIMessageLengths = [ " );
2200:     tot  = 0.0;
2201:     for (i=0; i<size; i++){
2202:       tot += mydata[i];
2203:       PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2204:     }
2205:     PetscFPrintf(comm, fd, "]\n" );
2206:     messageLength = tot;
2207:   }

2209:   /*   Reductions */
2210:   MPI_Gather(&red,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2211:   if (!rank){
2212:     PetscFPrintf(comm, fd, "MPIReductions = [ " );
2213:     tot  = 0.0;
2214:     for (i=0; i<size; i++){
2215:       tot += mydata[i];
2216:       PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2217:     }
2218:     PetscFPrintf(comm, fd, "]\n" );
2219:     numReductions = tot;
2220:   }

2222:   /* Get total number of stages --
2223:        Currently, a single processor can register more stages than another, but stages must all be registered in order.
2224:        We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
2225:        This seems best accomplished by assoicating a communicator with each stage.
2226:   */
2227:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
2228:   PetscMalloc(numStages * sizeof(PetscBool), &localStageUsed);
2229:   PetscMalloc(numStages * sizeof(PetscBool), &stageUsed);
2230:   PetscMalloc(numStages * sizeof(PetscBool), &localStageVisible);
2231:   PetscMalloc(numStages * sizeof(PetscBool), &stageVisible);
2232:   if (numStages > 0) {
2233:     stageInfo = stageLog->stageInfo;
2234:     for(stage = 0; stage < numStages; stage++) {
2235:       if (stage < stageLog->numStages) {
2236:         localStageUsed[stage]    = stageInfo[stage].used;
2237:         localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
2238:       } else {
2239:         localStageUsed[stage]    = PETSC_FALSE;
2240:         localStageVisible[stage] = PETSC_TRUE;
2241:       }
2242:     }
2243:     MPI_Allreduce(localStageUsed,    stageUsed,    numStages, MPI_INT, MPI_LOR,  comm);
2244:     MPI_Allreduce(localStageVisible, stageVisible, numStages, MPI_INT, MPI_LAND, comm);
2245: for(stage = 0; stage < numStages; stage++) {
2246:       if (stageUsed[stage]) {
2247:         PetscFPrintf(comm, fd, "\n#Summary of Stages:   ----- Time ------  ----- Flops -----  --- Messages ---  -- Message Lengths --  -- Reductions --\n");
2248:         PetscFPrintf(comm, fd, "#                       Avg     %%Total     Avg     %%Total   counts   %%Total     Avg         %%Total   counts   %%Total \n");
2249:         break;
2250:       }
2251:     }
2252:     for(stage = 0; stage < numStages; stage++) {
2253:       if (!stageUsed[stage]) continue;
2254:       if (localStageUsed[stage]) {
2255:         MPI_Allreduce(&stageInfo[stage].perfInfo.time,          &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2256:         MPI_Allreduce(&stageInfo[stage].perfInfo.flops,         &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2257:         MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages,   &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2258:         MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2259:         MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2260:         name = stageInfo[stage].name;
2261:       } else {
2262:         MPI_Allreduce(&zero,                           &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2263:         MPI_Allreduce(&zero,                           &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2264:         MPI_Allreduce(&zero,                           &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2265:         MPI_Allreduce(&zero,                           &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2266:         MPI_Allreduce(&zero,                           &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2267:         name = "";
2268:       }
2269:       mess *= 0.5; messLen *= 0.5; red /= size;
2270:       if (TotalTime     != 0.0) fracTime       = stageTime/TotalTime;    else fracTime       = 0.0;
2271:       if (TotalFlops    != 0.0) fracFlops      = flops/TotalFlops;       else fracFlops      = 0.0;
2272:       /* Talk to Barry if (stageTime     != 0.0) flops          = (size*flops)/stageTime; else flops          = 0.0; */
2273:       if (numMessages   != 0.0) fracMessages   = mess/numMessages;       else fracMessages   = 0.0;
2274:       if (numMessages   != 0.0) avgMessLen     = messLen/numMessages;    else avgMessLen     = 0.0;
2275:       if (messageLength != 0.0) fracLength     = messLen/messageLength;  else fracLength     = 0.0;
2276:       if (numReductions != 0.0) fracReductions = red/numReductions;      else fracReductions = 0.0;
2277:       PetscFPrintf(comm, fd, "# ");
2278:       PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%%  %6.4e %5.1f%%  %5.3e %5.1f%%  %5.3e      %5.1f%%  %5.3e %5.1f%% \n",
2279:                           stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops,
2280:                           mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions);
2281:     }
2282:   }

2284:   /* Report events */
2285:   PetscFPrintf(comm,fd,"\n# Event\n");
2286:   PetscFPrintf(comm,fd,"# ------------------------------------------------------\n");
2287:   PetscFPrintf(comm,fd,"class Stage(object):\n");
2288:   PetscFPrintf(comm,fd,"    def __init__(self, name):\n");
2289:   PetscFPrintf(comm,fd,"        self.name  = name\n");
2290:   PetscFPrintf(comm,fd,"        self.event = {}\n");
2291:   PetscFPrintf(comm,fd, "class Dummy(object):\n");
2292:   PetscFPrintf(comm,fd, "    pass\n");
2293:   /* Problem: The stage name will not show up unless the stage executed on proc 1 */
2294:   for(stage = 0; stage < numStages; stage++) {
2295:     if (!stageVisible[stage]) continue;
2296:     if (localStageUsed[stage]) {
2297:       MPI_Allreduce(&stageInfo[stage].perfInfo.time,          &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2298:       MPI_Allreduce(&stageInfo[stage].perfInfo.flops,         &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2299:       MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages,   &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2300:       MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2301:       MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2302:     } else {
2303:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
2304:       MPI_Allreduce(&zero,                           &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2305:       MPI_Allreduce(&zero,                           &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2306:       MPI_Allreduce(&zero,                           &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2307:       MPI_Allreduce(&zero,                           &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2308:       MPI_Allreduce(&zero,                           &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2309:     }
2310:     mess *= 0.5; messLen *= 0.5; red /= size;

2312:     /* Get total number of events in this stage --
2313:        Currently, a single processor can register more events than another, but events must all be registered in order,
2314:        just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
2315:        on the event ID. This seems best accomplished by assoicating a communicator with each stage.

2317:        Problem: If the event did not happen on proc 1, its name will not be available.
2318:        Problem: Event visibility is not implemented
2319:     */

2321:     {
2322:       size_t len, c;

2324:       PetscStrcpy(stageName, stageInfo[stage].name);
2325:       PetscStrlen(stageName, &len);
2326:       for(c = 0; c < len; ++c) {
2327:         if (stageName[c] == ' ') stageName[c] = '_';
2328:       }
2329:     }
2330:     if (!rank){
2331:       PetscFPrintf(comm, fd, "%s = Stage('%s')\n", stageName, stageName);
2332:     }

2334:     if (localStageUsed[stage]) {
2335:       eventInfo      = stageLog->stageInfo[stage].eventLog->eventInfo;
2336:       localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
2337:     } else {
2338:       localNumEvents = 0;
2339:     }
2340:     MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
2341:     for(event = 0; event < numEvents; event++) {
2342:       if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
2343:         MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, comm);
2344:         name = stageLog->eventLog->eventInfo[event].name;
2345:       } else {
2346:         MPI_Allreduce(&ierr, &maxCt, 1, MPI_INT, MPI_MAX, comm);
2347:         name = "";
2348:       }

2350:       if (maxCt != 0) {
2351:         PetscFPrintf(comm, fd,"#\n");
2352:         if (!rank){
2353:           PetscFPrintf(comm, fd, "%s = Dummy()\n",name);
2354:           PetscFPrintf(comm, fd, "%s.event['%s'] = %s\n",stageName,name,name);
2355:         }
2356:         /* Count */
2357:         MPI_Gather(&eventInfo[event].count,1,MPI_INT,mycount,1,MPI_INT,0,comm);
2358:         PetscFPrintf(comm, fd, "%s.Count = [ ", name);
2359:         if (!rank){
2360:           for (i=0; i<size; i++){
2361:             PetscFPrintf(comm, fd, "  %7d,",mycount[i] );
2362:           }
2363:           PetscFPrintf(comm, fd, "]\n" );
2364:         }
2365:         /* Time */
2366:         MPI_Gather(&eventInfo[event].time,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2367:         if (!rank){
2368:           PetscFPrintf(comm, fd, "%s.Time  = [ ", name);
2369:           for (i=0; i<size; i++){
2370:             PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2371:           }
2372:           PetscFPrintf(comm, fd, "]\n" );
2373:         }
2374:         /* Flops */
2375:         MPI_Gather(&eventInfo[event].flops,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2376:         if (!rank){
2377:           PetscFPrintf(comm, fd, "%s.Flops = [ ", name);
2378:           for (i=0; i<size; i++){
2379:             PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2380:           }
2381:           PetscFPrintf(comm, fd, "]\n" );
2382:         }
2383:         /* Num Messages */
2384:         MPI_Gather(&eventInfo[event].numMessages,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2385:         PetscFPrintf(comm, fd, "%s.NumMessages = [ ", name);
2386:         if (!rank){
2387:           for (i=0; i<size; i++){
2388:             PetscFPrintf(comm, fd, "  %7.1e,",mydata[i] );
2389:           }
2390:           PetscFPrintf(comm, fd, "]\n" );
2391:         }
2392:         /* Message Length */
2393:         MPI_Gather(&eventInfo[event].messageLength,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2394:         if (!rank){
2395:           PetscFPrintf(comm, fd, "%s.MessageLength = [ ", name);
2396:           for (i=0; i<size; i++){
2397:             PetscFPrintf(comm, fd, "  %5.3e,",mydata[i] );
2398:           }
2399:           PetscFPrintf(comm, fd, "]\n" );
2400:         }
2401:         /* Num Reductions */
2402:         MPI_Gather(&eventInfo[event].numReductions,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2403:         PetscFPrintf(comm, fd, "%s.NumReductions = [ ", name);
2404:         if (!rank){
2405:           for (i=0; i<size; i++){
2406:             PetscFPrintf(comm, fd, "  %7.1e,",mydata[i] );
2407:           }
2408:           PetscFPrintf(comm, fd, "]\n" );
2409:         }
2410:       }
2411:     }
2412:   }

2414:   /* Right now, only stages on the first processor are reported here, meaning only objects associated with
2415:      the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
2416:      stats for stages local to processor sets.
2417:   */
2418:   for(stage = 0; stage < numStages; stage++) {
2419:     if (!localStageUsed[stage]) {
2420:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
2421:     }
2422:   }

2424:   PetscFree(localStageUsed);
2425:   PetscFree(stageUsed);
2426:   PetscFree(localStageVisible);
2427:   PetscFree(stageVisible);
2428:   PetscFree(mydata);
2429:   PetscFree(mycount);

2431:   /* Information unrelated to this particular run */
2432:   PetscFPrintf(comm, fd,
2433:     "# ========================================================================================================================\n");
2434:   PetscTime(y);
2435:   PetscTime(x);
2436:   PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
2437:   PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
2438:   PetscFPrintf(comm,fd,"AveragetimetogetPetscTime = %g\n", (y-x)/10.0);
2439:   /* MPI information */
2440:   if (size > 1) {
2441:     MPI_Status  status;
2442:     PetscMPIInt tag;
2443:     MPI_Comm    newcomm;

2445:     MPI_Barrier(comm);
2446:     PetscTime(x);
2447:     MPI_Barrier(comm);
2448:     MPI_Barrier(comm);
2449:     MPI_Barrier(comm);
2450:     MPI_Barrier(comm);
2451:     MPI_Barrier(comm);
2452:     PetscTime(y);
2453:     PetscFPrintf(comm, fd, "AveragetimeforMPI_Barrier = %g\n", (y-x)/5.0);
2454:     PetscCommDuplicate(comm,&newcomm, &tag);
2455:     MPI_Barrier(comm);
2456:     if (rank) {
2457:       MPI_Recv(0, 0, MPI_INT, rank-1,            tag, newcomm, &status);
2458:       MPI_Send(0, 0, MPI_INT, (rank+1)%size, tag, newcomm);
2459:     } else {
2460:       PetscTime(x);
2461:       MPI_Send(0, 0, MPI_INT, 1,          tag, newcomm);
2462:       MPI_Recv(0, 0, MPI_INT, size-1, tag, newcomm, &status);
2463:       PetscTime(y);
2464:       PetscFPrintf(comm,fd,"AveragetimforzerosizeMPI_Send = %g\n", (y-x)/size);
2465:     }
2466:     PetscCommDestroy(&newcomm);
2467:   }

2469:   /* Cleanup */
2470:   PetscFPrintf(comm, fd, "\n");
2471:   PetscStageLogPush(stageLog, lastStage);
2472:   return(0);
2473: }

2475: #else /* end of -DPETSC_USE_LOG section */

2479: PetscErrorCode  PetscLogObjectState(PetscObject obj, const char format[], ...)
2480: {
2482:   return(0);
2483: }

2485: #endif /* PETSC_USE_LOG*/


2488: PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
2489: PetscClassId PETSC_OBJECT_CLASSID  = 0;

2493: /*@C
2494:   PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 

2496:   Not Collective

2498:   Input Parameter:
2499: . name   - The class name
2500:             
2501:   Output Parameter:
2502: . oclass - The class id or classid

2504:   Level: developer

2506: .keywords: log, class, register

2508: @*/
2509: PetscErrorCode  PetscClassIdRegister(const char name[],PetscClassId *oclass )
2510: {
2511: #if defined(PETSC_USE_LOG)
2512:   PetscStageLog  stageLog;
2513:   PetscInt       stage;
2515: #endif

2518:   *oclass = ++PETSC_LARGEST_CLASSID;
2519: #if defined(PETSC_USE_LOG)
2520:   PetscLogGetStageLog(&stageLog);
2521:   PetscClassRegLogRegister(stageLog->classLog, name, *oclass);
2522:   for(stage = 0; stage < stageLog->numStages; stage++) {
2523:     ClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
2524:   }
2525: #endif
2526:   return(0);
2527: }