Actual source code: plog.c
2: /*
3: PETSc code to log object creation and destruction and PETSc events.
5: This provides the public API used by the rest of PETSc and by users.
7: These routines use a private API that is not used elsewhere in PETSc and is not
8: accessible to users. The private API is defined in logimpl.h and the utils directory.
10: */
11: #include <petscsys.h> /*I "petscsys.h" I*/
12: #include <petsctime.h>
13: #if defined(PETSC_HAVE_MPE)
14: #include <mpe.h>
15: #endif
16: #include <stdarg.h>
17: #include <sys/types.h>
18: #if defined(PETSC_HAVE_STDLIB_H)
19: #include <stdlib.h>
20: #endif
21: #if defined(PETSC_HAVE_MALLOC_H)
22: #include <malloc.h>
23: #endif
24: #include <private/logimpl.h>
26: PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT;
29: std::map<std::string,PETSc::LogEvent> PETSc::Log::event_registry;
30: std::map<std::string,PETSc::LogStage> PETSc::Log::stage_registry;
31: #endif
33: #if defined(PETSC_USE_LOG)
34: #include <petscmachineinfo.h>
35: #include <petscconfiginfo.h>
37: /* used in the MPI_XXX() count macros in petsclog.h */
39: /* Action and object logging variables */
40: Action *actions = PETSC_NULL;
41: Object *objects = PETSC_NULL;
42: PetscBool logActions = PETSC_FALSE;
43: PetscBool logObjects = PETSC_FALSE;
44: int numActions = 0, maxActions = 100;
45: int numObjects = 0, maxObjects = 100;
46: int numObjectsDestroyed = 0;
48: /* Global counters */
49: PetscLogDouble BaseTime = 0.0;
50: PetscLogDouble _TotalFlops = 0.0; /* The number of flops */
51: PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */
52: PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */
53: PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */
54: PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */
55: PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */
56: PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */
57: PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */
58: PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */
59: PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */
60: PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */
61: PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */
62: PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */
63: PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */
64: PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */
65: PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */
66: PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */
68: /* Logging functions */
69: PetscErrorCode (*_PetscLogPHC)(PetscObject) = PETSC_NULL;
70: PetscErrorCode (*_PetscLogPHD)(PetscObject) = PETSC_NULL;
71: PetscErrorCode (*_PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = PETSC_NULL;
72: PetscErrorCode (*_PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = PETSC_NULL;
74: /* Tracing event logging variables */
75: FILE *tracefile = PETSC_NULL;
76: int tracelevel = 0;
77: const char *traceblanks = " ";
78: char tracespace[128] = " ";
79: PetscLogDouble tracetime = 0.0;
80: static PetscBool PetscLogBegin_PrivateCalled = PETSC_FALSE;
82: /*---------------------------------------------- General Functions --------------------------------------------------*/
85: /*@C
86: PetscLogDestroy - Destroys the object and event logging data and resets the global counters.
88: Not Collective
90: Notes:
91: This routine should not usually be used by programmers. Instead employ
92: PetscLogStagePush() and PetscLogStagePop().
94: Level: developer
96: .keywords: log, destroy
97: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogStagePush(), PlogStagePop()
98: @*/
99: PetscErrorCode PetscLogDestroy(void)
100: {
101: PetscStageLog stageLog;
105: PetscFree(actions);
106: PetscFree(objects);
107: PetscLogSet(PETSC_NULL, PETSC_NULL);
109: /* Resetting phase */
110: PetscLogGetStageLog(&stageLog);
111: PetscStageLogDestroy(stageLog);
112: _TotalFlops = 0.0;
113: numActions = 0;
114: numObjects = 0;
115: numObjectsDestroyed = 0;
116: maxActions = 100;
117: maxObjects = 100;
118: actions = PETSC_NULL;
119: objects = PETSC_NULL;
120: logActions = PETSC_FALSE;
121: logObjects = PETSC_FALSE;
122: BaseTime = 0.0;
123: _TotalFlops = 0.0;
124: petsc_tmp_flops = 0.0;
125: petsc_send_ct = 0.0;
126: petsc_recv_ct = 0.0;
127: petsc_send_len = 0.0;
128: petsc_recv_len = 0.0;
129: petsc_isend_ct = 0.0;
130: petsc_irecv_ct = 0.0;
131: petsc_isend_len = 0.0;
132: petsc_irecv_len = 0.0;
133: petsc_wait_ct = 0.0;
134: petsc_wait_any_ct = 0.0;
135: petsc_wait_all_ct = 0.0;
136: petsc_sum_of_waits_ct = 0.0;
137: petsc_allreduce_ct = 0.0;
138: petsc_gather_ct = 0.0;
139: petsc_scatter_ct = 0.0;
140: PETSC_LARGEST_EVENT = PETSC_EVENT;
141: _PetscLogPHC = PETSC_NULL;
142: _PetscLogPHD = PETSC_NULL;
143: tracefile = PETSC_NULL;
144: tracelevel = 0;
145: traceblanks = " ";
146: tracespace[0] = ' '; tracespace[1] = 0;
147: tracetime = 0.0;
148: PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
149: PETSC_OBJECT_CLASSID = 0;
150: _stageLog = 0;
151: PetscLogBegin_PrivateCalled = PETSC_FALSE;
152: return(0);
153: }
157: /*@C
158: PetscLogSet - Sets the logging functions called at the beginning and ending of every event.
160: Not Collective
162: Input Parameters:
163: + b - The function called at beginning of event
164: - e - The function called at end of event
166: Level: developer
168: .seealso: PetscLogDump(), PetscLogBegin(), PetscLogAllBegin(), PetscLogTraceBegin()
169: @*/
170: PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject),
171: PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject))
172: {
174: _PetscLogPLB = b;
175: _PetscLogPLE = e;
176: return(0);
177: }
179: #if defined(PETSC_HAVE_CHUD)
180: #include <CHUD/CHUD.h>
181: #endif
182: #if defined(PETSC_HAVE_PAPI)
183: #include <papi.h>
184: int PAPIEventSet = PAPI_NULL;
185: #endif
187: /*------------------------------------------- Initialization Functions ----------------------------------------------*/
190: PetscErrorCode PetscLogBegin_Private(void)
191: {
192: int stage;
193: PetscBool opt;
194: PetscErrorCode ierr;
197: if (PetscLogBegin_PrivateCalled) return(0);
198: PetscLogBegin_PrivateCalled = PETSC_TRUE;
200: PetscOptionsHasName(PETSC_NULL, "-log_exclude_actions", &opt);
201: if (opt) {
202: logActions = PETSC_FALSE;
203: }
204: PetscOptionsHasName(PETSC_NULL, "-log_exclude_objects", &opt);
205: if (opt) {
206: logObjects = PETSC_FALSE;
207: }
208: if (logActions) {
209: PetscMalloc(maxActions * sizeof(Action), &actions);
210: }
211: if (logObjects) {
212: PetscMalloc(maxObjects * sizeof(Object), &objects);
213: }
214: _PetscLogPHC = PetscLogObjCreateDefault;
215: _PetscLogPHD = PetscLogObjDestroyDefault;
216: /* Setup default logging structures */
217: PetscStageLogCreate(&_stageLog);
218: PetscStageLogRegister(_stageLog, "Main Stage", &stage);
219: #if defined(PETSC_HAVE_CHUD)
220: chudInitialize();
221: chudAcquireSamplingFacility(CHUD_BLOCKING);
222: chudSetSamplingDevice(chudCPU1Dev);
223: chudSetStartDelay(0,chudNanoSeconds);
224: chudClearPMCMode(chudCPU1Dev,chudUnused);
225: chudClearPMCs();
226: /* chudSetPMCMuxPosition(chudCPU1Dev,0,0); */
227: printf("%s\n",chudGetEventName(chudCPU1Dev,PMC_1,193));
228: printf("%s\n",chudGetEventDescription(chudCPU1Dev,PMC_1,193));
229: printf("%s\n",chudGetEventNotes(chudCPU1Dev,PMC_1,193));
230: chudSetPMCEvent(chudCPU1Dev,PMC_1,193);
231: chudSetPMCMode(chudCPU1Dev,PMC_1,chudCounter);
232: chudSetPrivilegeFilter(chudCPU1Dev,PMC_1,chudCountUserEvents);
233: chudSetPMCEventMask(chudCPU1Dev,PMC_1,0xFE);
234: if (!chudIsEventValid(chudCPU1Dev,PMC_1,193)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Event is not valid %d",193);
235: chudStartPMCs();
236: #endif
237: #if defined(PETSC_HAVE_PAPI)
238: PAPI_library_init(PAPI_VER_CURRENT);
239: if (ierr != PAPI_VER_CURRENT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Cannot initialize PAPI");
240: PAPI_query_event(PAPI_FP_INS);
241: PAPI_create_eventset(&PAPIEventSet);
242: PAPI_add_event(PAPIEventSet,PAPI_FP_INS);
243: PAPI_start(PAPIEventSet);
244: #endif
246: /* All processors sync here for more consistent logging */
247: MPI_Barrier(PETSC_COMM_WORLD);
248: PetscTime(BaseTime);
249: PetscLogStagePush(stage);
250: return(0);
251: }
255: /*@C
256: PetscLogBegin - Turns on logging of objects and events. This logs flop
257: rates and object creation and should not slow programs down too much.
258: This routine may be called more than once.
260: Logically Collective over PETSC_COMM_WORLD
262: Options Database Keys:
263: + -log_summary - Prints summary of flop and timing information to the
264: screen (for code compiled with PETSC_USE_LOG)
265: - -log - Prints detailed log information (for code compiled with PETSC_USE_LOG)
267: Usage:
268: .vb
269: PetscInitialize(...);
270: PetscLogBegin();
271: ... code ...
272: PetscLogView(viewer); or PetscLogDump();
273: PetscFinalize();
274: .ve
276: Notes:
277: PetscLogView(viewer) or PetscLogDump() actually cause the printing of
278: the logging information.
280: Level: advanced
282: .keywords: log, begin
283: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin()
284: @*/
285: PetscErrorCode PetscLogBegin(void)
286: {
290: PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault);
291: PetscLogBegin_Private();
292: return(0);
293: }
297: /*@C
298: PetscLogAllBegin - Turns on extensive logging of objects and events. Logs
299: all events. This creates large log files and slows the program down.
301: Logically Collective on PETSC_COMM_WORLD
303: Options Database Keys:
304: . -log_all - Prints extensive log information (for code compiled with PETSC_USE_LOG)
306: Usage:
307: .vb
308: PetscInitialize(...);
309: PetscLogAllBegin();
310: ... code ...
311: PetscLogDump(filename);
312: PetscFinalize();
313: .ve
315: Notes:
316: A related routine is PetscLogBegin (with the options key -log), which is
317: intended for production runs since it logs only flop rates and object
318: creation (and shouldn't significantly slow the programs).
320: Level: advanced
322: .keywords: log, all, begin
323: .seealso: PetscLogDump(), PetscLogBegin(), PetscLogTraceBegin()
324: @*/
325: PetscErrorCode PetscLogAllBegin(void)
326: {
330: PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete);
331: PetscLogBegin_Private();
332: return(0);
333: }
337: /*@
338: PetscLogTraceBegin - Activates trace logging. Every time a PETSc event
339: begins or ends, the event name is printed.
341: Logically Collective on PETSC_COMM_WORLD
343: Input Parameter:
344: . file - The file to print trace in (e.g. stdout)
346: Options Database Key:
347: . -log_trace [filename] - Activates PetscLogTraceBegin()
349: Notes:
350: PetscLogTraceBegin() prints the processor number, the execution time (sec),
351: then "Event begin:" or "Event end:" followed by the event name.
353: PetscLogTraceBegin() allows tracing of all PETSc calls, which is useful
354: to determine where a program is hanging without running in the
355: debugger. Can be used in conjunction with the -info option.
357: Level: intermediate
359: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogBegin()
360: @*/
361: PetscErrorCode PetscLogTraceBegin(FILE *file)
362: {
366: tracefile = file;
367: PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace);
368: PetscLogBegin_Private();
369: return(0);
370: }
374: /*@
375: PetscLogActions - Determines whether actions are logged for the graphical viewer.
377: Not Collective
379: Input Parameter:
380: . flag - PETSC_TRUE if actions are to be logged
382: Level: intermediate
384: Note: Logging of actions continues to consume more memory as the program
385: runs. Long running programs should consider turning this feature off.
387: Options Database Keys:
388: . -log_exclude_actions - Turns off actions logging
390: .keywords: log, stage, register
391: .seealso: PetscLogStagePush(), PetscLogStagePop()
392: @*/
393: PetscErrorCode PetscLogActions(PetscBool flag)
394: {
396: logActions = flag;
397: return(0);
398: }
402: /*@
403: PetscLogObjects - Determines whether objects are logged for the graphical viewer.
405: Not Collective
407: Input Parameter:
408: . flag - PETSC_TRUE if objects are to be logged
410: Level: intermediate
412: Note: Logging of objects continues to consume more memory as the program
413: runs. Long running programs should consider turning this feature off.
415: Options Database Keys:
416: . -log_exclude_objects - Turns off objects logging
418: .keywords: log, stage, register
419: .seealso: PetscLogStagePush(), PetscLogStagePop()
420: @*/
421: PetscErrorCode PetscLogObjects(PetscBool flag)
422: {
424: logObjects = flag;
425: return(0);
426: }
428: /*------------------------------------------------ Stage Functions --------------------------------------------------*/
431: /*@C
432: PetscLogStageRegister - Attaches a charactor string name to a logging stage.
434: Not Collective
436: Input Parameter:
437: . sname - The name to associate with that stage
439: Output Parameter:
440: . stage - The stage number
442: Level: intermediate
444: .keywords: log, stage, register
445: .seealso: PetscLogStagePush(), PetscLogStagePop()
446: @*/
447: PetscErrorCode PetscLogStageRegister(const char sname[],PetscLogStage *stage)
448: {
449: PetscStageLog stageLog;
450: PetscLogEvent event;
454: PetscLogGetStageLog(&stageLog);
455: PetscStageLogRegister(stageLog, sname, stage);
456: /* Copy events already changed in the main stage, this sucks */
457: EventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents);
458: for (event = 0; event < stageLog->eventLog->numEvents; event++) {
459: EventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event],&stageLog->stageInfo[*stage].eventLog->eventInfo[event]);
460: }
461: ClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses);
462: return(0);
463: }
467: /*@C
468: PetscLogStagePush - This function pushes a stage on the stack.
470: Not Collective
472: Input Parameter:
473: . stage - The stage on which to log
475: Usage:
476: If the option -log_sumary is used to run the program containing the
477: following code, then 2 sets of summary data will be printed during
478: PetscFinalize().
479: .vb
480: PetscInitialize(int *argc,char ***args,0,0);
481: [stage 0 of code]
482: PetscLogStagePush(1);
483: [stage 1 of code]
484: PetscLogStagePop();
485: PetscBarrier(...);
486: [more stage 0 of code]
487: PetscFinalize();
488: .ve
489:
490: Notes:
491: Use PetscLogStageRegister() to register a stage.
493: Level: intermediate
495: .keywords: log, push, stage
496: .seealso: PetscLogStagePop(), PetscLogStageRegister(), PetscBarrier()
497: @*/
498: PetscErrorCode PetscLogStagePush(PetscLogStage stage)
499: {
500: PetscStageLog stageLog;
504: PetscLogGetStageLog(&stageLog);
505: PetscStageLogPush(stageLog, stage);
506: return(0);
507: }
511: /*@C
512: PetscLogStagePop - This function pops a stage from the stack.
514: Not Collective
516: Usage:
517: If the option -log_sumary is used to run the program containing the
518: following code, then 2 sets of summary data will be printed during
519: PetscFinalize().
520: .vb
521: PetscInitialize(int *argc,char ***args,0,0);
522: [stage 0 of code]
523: PetscLogStagePush(1);
524: [stage 1 of code]
525: PetscLogStagePop();
526: PetscBarrier(...);
527: [more stage 0 of code]
528: PetscFinalize();
529: .ve
531: Notes:
532: Use PetscLogStageRegister() to register a stage.
534: Level: intermediate
536: .keywords: log, pop, stage
537: .seealso: PetscLogStagePush(), PetscLogStageRegister(), PetscBarrier()
538: @*/
539: PetscErrorCode PetscLogStagePop(void)
540: {
541: PetscStageLog stageLog;
545: PetscLogGetStageLog(&stageLog);
546: PetscStageLogPop(stageLog);
547: return(0);
548: }
552: /*@
553: PetscLogStageSetActive - Determines stage activity for PetscLogEventBegin() and PetscLogEventEnd().
555: Not Collective
557: Input Parameters:
558: + stage - The stage
559: - isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)
561: Level: intermediate
563: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
564: @*/
565: PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive)
566: {
567: PetscStageLog stageLog;
571: PetscLogGetStageLog(&stageLog);
572: PetscStageLogSetActive(stageLog, stage, isActive);
573: return(0);
574: }
578: /*@
579: PetscLogStageGetActive - Returns stage activity for PetscLogEventBegin() and PetscLogEventEnd().
581: Not Collective
583: Input Parameter:
584: . stage - The stage
586: Output Parameter:
587: . isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)
589: Level: intermediate
591: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
592: @*/
593: PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive)
594: {
595: PetscStageLog stageLog;
599: PetscLogGetStageLog(&stageLog);
600: PetscStageLogGetActive(stageLog, stage, isActive);
601: return(0);
602: }
606: /*@
607: PetscLogStageSetVisible - Determines stage visibility in PetscLogView()
609: Not Collective
611: Input Parameters:
612: + stage - The stage
613: - isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)
615: Level: intermediate
617: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogView()
618: @*/
619: PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible)
620: {
621: PetscStageLog stageLog;
625: PetscLogGetStageLog(&stageLog);
626: PetscStageLogSetVisible(stageLog, stage, isVisible);
627: return(0);
628: }
632: /*@
633: PetscLogStageGetVisible - Returns stage visibility in PetscLogView()
635: Not Collective
637: Input Parameter:
638: . stage - The stage
640: Output Parameter:
641: . isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)
643: Level: intermediate
645: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogView()
646: @*/
647: PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible)
648: {
649: PetscStageLog stageLog;
653: PetscLogGetStageLog(&stageLog);
654: PetscStageLogGetVisible(stageLog, stage, isVisible);
655: return(0);
656: }
660: /*@C
661: PetscLogStageGetId - Returns the stage id when given the stage name.
663: Not Collective
665: Input Parameter:
666: . name - The stage name
668: Output Parameter:
669: . stage - The stage
671: Level: intermediate
673: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
674: @*/
675: PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage)
676: {
677: PetscStageLog stageLog;
681: PetscLogGetStageLog(&stageLog);
682: PetscStageLogGetStage(stageLog, name, stage);
683: return(0);
684: }
686: /*------------------------------------------------ Event Functions --------------------------------------------------*/
689: /*@C
690: PetscLogEventRegister - Registers an event name for logging operations in an application code.
692: Not Collective
694: Input Parameter:
695: + name - The name associated with the event
696: - classid - The classid associated to the class for this event, obtain either with
697: PetscClassIdRegister() or use a predefined one such as KSP_CLASSID, SNES_CLASSID, the predefined ones
698: are only available in C code
699:
700: Output Parameter:
701: . event - The event id for use with PetscLogEventBegin() and PetscLogEventEnd().
703: Example of Usage:
704: .vb
705: PetscLogEvent USER_EVENT;
706: PetscClassId classid;
707: PetscLogDouble user_event_flops;
708: PetscClassIdRegister("class name",&classid);
709: PetscLogEventRegister("User event name",classid,&USER_EVENT);
710: PetscLogEventBegin(USER_EVENT,0,0,0,0);
711: [code segment to monitor]
712: PetscLogFlops(user_event_flops);
713: PetscLogEventEnd(USER_EVENT,0,0,0,0);
714: .ve
716: Notes:
717: PETSc automatically logs library events if the code has been
718: compiled with -DPETSC_USE_LOG (which is the default) and -log,
719: -log_summary, or -log_all are specified. PetscLogEventRegister() is
720: intended for logging user events to supplement this PETSc
721: information.
723: PETSc can gather data for use with the utilities Upshot/Nupshot
724: (part of the MPICH distribution). If PETSc has been compiled
725: with flag -DPETSC_HAVE_MPE (MPE is an additional utility within
726: MPICH), the user can employ another command line option, -log_mpe,
727: to create a logfile, "mpe.log", which can be visualized
728: Upshot/Nupshot.
730: The classid is associated with each event so that classes of events
731: can be disabled simultaneously, such as all matrix events. The user
732: can either use an existing classid, such as MAT_CLASSID, or create
733: their own as shown in the example.
735: Level: intermediate
737: .keywords: log, event, register
738: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogFlops(),
739: PetscLogEventMPEActivate(), PetscLogEventMPEDeactivate(),
740: PetscLogEventActivate(), PetscLogEventDeactivate(), PetscClassIdRegister()
741: @*/
742: PetscErrorCode PetscLogEventRegister(const char name[],PetscClassId classid,PetscLogEvent *event)
743: {
744: PetscStageLog stageLog;
745: int stage;
749: *event = PETSC_DECIDE;
750: PetscLogGetStageLog(&stageLog);
751: EventRegLogRegister(stageLog->eventLog, name, classid, event);
752: for (stage = 0; stage < stageLog->numStages; stage++) {
753: EventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents);
754: ClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
755: }
756: return(0);
757: }
761: /*@
762: PetscLogEventActivate - Indicates that a particular event should be logged.
764: Not Collective
766: Input Parameter:
767: . event - The event id
769: Usage:
770: .vb
771: PetscLogEventDeactivate(VEC_SetValues);
772: [code where you do not want to log VecSetValues()]
773: PetscLogEventActivate(VEC_SetValues);
774: [code where you do want to log VecSetValues()]
775: .ve
777: Note:
778: The event may be either a pre-defined PETSc event (found in include/petsclog.h)
779: or an event number obtained with PetscLogEventRegister().
781: Level: advanced
783: .keywords: log, event, activate
784: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventDeactivate()
785: @*/
786: PetscErrorCode PetscLogEventActivate(PetscLogEvent event)
787: {
788: PetscStageLog stageLog;
789: int stage;
793: PetscLogGetStageLog(&stageLog);
794: PetscStageLogGetCurrent(stageLog, &stage);
795: EventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
796: return(0);
797: }
801: /*@
802: PetscLogEventDeactivate - Indicates that a particular event should not be logged.
804: Not Collective
806: Input Parameter:
807: . event - The event id
809: Usage:
810: .vb
811: PetscLogEventDeactivate(VEC_SetValues);
812: [code where you do not want to log VecSetValues()]
813: PetscLogEventActivate(VEC_SetValues);
814: [code where you do want to log VecSetValues()]
815: .ve
817: Note:
818: The event may be either a pre-defined PETSc event (found in
819: include/petsclog.h) or an event number obtained with PetscLogEventRegister()).
821: Level: advanced
823: .keywords: log, event, deactivate
824: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventActivate()
825: @*/
826: PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event)
827: {
828: PetscStageLog stageLog;
829: int stage;
833: PetscLogGetStageLog(&stageLog);
834: PetscStageLogGetCurrent(stageLog, &stage);
835: EventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
836: return(0);
837: }
841: /*@
842: PetscLogEventSetActiveAll - Sets the event activity in every stage.
844: Not Collective
846: Input Parameters:
847: + event - The event id
848: - isActive - The activity flag determining whether the event is logged
850: Level: advanced
852: .keywords: log, event, activate
853: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventActivate(),PlogEventDeactivate()
854: @*/
855: PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive)
856: {
857: PetscStageLog stageLog;
858: int stage;
862: PetscLogGetStageLog(&stageLog);
863: for(stage = 0; stage < stageLog->numStages; stage++) {
864: if (isActive) {
865: EventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
866: } else {
867: EventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
868: }
869: }
870: return(0);
871: }
875: /*@
876: PetscLogEventActivateClass - Activates event logging for a PETSc object class.
878: Not Collective
880: Input Parameter:
881: . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc.
883: Level: developer
885: .keywords: log, event, activate, class
886: .seealso: PetscInfoActivate(),PetscInfo(),PetscInfoAllow(),PetscLogEventDeactivateClass(), PetscLogEventActivate(),PetscLogEventDeactivate()
887: @*/
888: PetscErrorCode PetscLogEventActivateClass(PetscClassId classid)
889: {
890: PetscStageLog stageLog;
891: int stage;
895: PetscLogGetStageLog(&stageLog);
896: PetscStageLogGetCurrent(stageLog, &stage);
897: EventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
898: return(0);
899: }
903: /*@
904: PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class.
906: Not Collective
908: Input Parameter:
909: . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc.
911: Level: developer
913: .keywords: log, event, deactivate, class
914: .seealso: PetscInfoActivate(),PetscInfo(),PetscInfoAllow(),PetscLogEventActivateClass(), PetscLogEventActivate(),PetscLogEventDeactivate()
915: @*/
916: PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid)
917: {
918: PetscStageLog stageLog;
919: int stage;
923: PetscLogGetStageLog(&stageLog);
924: PetscStageLogGetCurrent(stageLog, &stage);
925: EventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
926: return(0);
927: }
929: /*MC
930: PetscLogEventBegin - Logs the beginning of a user event.
932: Synopsis:
933: PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,
934: PetscObject o4)
936: Not Collective
938: Input Parameters:
939: + e - integer associated with the event obtained from PetscLogEventRegister()
940: - o1,o2,o3,o4 - objects associated with the event, or 0
943: Fortran Synopsis:
944: void PetscLogEventBegin(int e,PetscErrorCode ierr)
946: Usage:
947: .vb
948: PetscLogEvent USER_EVENT;
949: PetscLogDouble user_event_flops;
950: PetscLogEventRegister("User event",0,&USER_EVENT);
951: PetscLogEventBegin(USER_EVENT,0,0,0,0);
952: [code segment to monitor]
953: PetscLogFlops(user_event_flops);
954: PetscLogEventEnd(USER_EVENT,0,0,0,0);
955: .ve
957: Notes:
958: You need to register each integer event with the command
959: PetscLogEventRegister(). The source code must be compiled with
960: -DPETSC_USE_LOG, which is the default.
962: PETSc automatically logs library events if the code has been
963: compiled with -DPETSC_USE_LOG, and -log, -log_summary, or -log_all are
964: specified. PetscLogEventBegin() is intended for logging user events
965: to supplement this PETSc information.
967: Level: intermediate
969: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops()
971: .keywords: log, event, begin
972: M*/
974: /*MC
975: PetscLogEventEnd - Log the end of a user event.
977: Synopsis:
978: PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,
979: PetscObject o4)
981: Not Collective
983: Input Parameters:
984: + e - integer associated with the event obtained with PetscLogEventRegister()
985: - o1,o2,o3,o4 - objects associated with the event, or 0
988: Fortran Synopsis:
989: void PetscLogEventEnd(int e,PetscErrorCode ierr)
991: Usage:
992: .vb
993: PetscLogEvent USER_EVENT;
994: PetscLogDouble user_event_flops;
995: PetscLogEventRegister("User event",0,&USER_EVENT,);
996: PetscLogEventBegin(USER_EVENT,0,0,0,0);
997: [code segment to monitor]
998: PetscLogFlops(user_event_flops);
999: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1000: .ve
1002: Notes:
1003: You should also register each additional integer event with the command
1004: PetscLogEventRegister(). Source code must be compiled with
1005: -DPETSC_USE_LOG, which is the default.
1007: PETSc automatically logs library events if the code has been
1008: compiled with -DPETSC_USE_LOG, and -log, -log_summary, or -log_all are
1009: specified. PetscLogEventEnd() is intended for logging user events
1010: to supplement this PETSc information.
1012: Level: intermediate
1014: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogFlops()
1016: .keywords: log, event, end
1017: M*/
1019: /*MC
1020: PetscLogEventBarrierBegin - Logs the time in a barrier before an event.
1022: Synopsis:
1023: PetscErrorCode PetscLogEventBarrierBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,
1024: PetscObject o4,MPI_Comm comm)
1026: Not Collective
1028: Input Parameters:
1029: . e - integer associated with the event obtained from PetscLogEventRegister()
1030: . o1,o2,o3,o4 - objects associated with the event, or 0
1031: . comm - communicator the barrier takes place over
1034: Usage:
1035: .vb
1036: PetscLogEventBarrierBegin(VEC_NormBarrier,0,0,0,0,comm);
1037: MPI_Allreduce()
1038: PetscLogEventBarrierEnd(VEC_NormBarrier,0,0,0,0,comm);
1039: .ve
1041: Notes:
1042: This is for logging the amount of time spent in a barrier for an event
1043: that requires synchronization.
1045: Additional Notes:
1046: Synchronization events always come in pairs; for example, VEC_NormBarrier and
1047: VEC_NormComm = VEC_NormBarrier + 1
1049: Level: advanced
1051: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops(), PetscLogEventBegin(),
1052: PetscLogEventBarrierEnd()
1054: .keywords: log, event, begin, barrier
1055: M*/
1057: /*MC
1058: PetscLogEventBarrierEnd - Logs the time in a barrier before an event.
1060: Synopsis:
1061: PetscErrorCode PetscLogEventBarrierEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,
1062: PetscObject o4,MPI_Comm comm)
1064: Logically Collective on MPI_Comm
1066: Input Parameters:
1067: . e - integer associated with the event obtained from PetscLogEventRegister()
1068: . o1,o2,o3,o4 - objects associated with the event, or 0
1069: . comm - communicator the barrier takes place over
1072: Usage:
1073: .vb
1074: PetscLogEventBarrierBegin(VEC_NormBarrier,0,0,0,0,comm);
1075: MPI_Allreduce()
1076: PetscLogEventBarrierEnd(VEC_NormBarrier,0,0,0,0,comm);
1077: .ve
1079: Notes:
1080: This is for logging the amount of time spent in a barrier for an event
1081: that requires synchronization.
1083: Additional Notes:
1084: Synchronization events always come in pairs; for example, VEC_NormBarrier and
1085: VEC_NormComm = VEC_NormBarrier + 1
1087: Level: advanced
1089: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops(), PetscLogEventBegin(),
1090: PetscLogEventBarrierBegin()
1092: .keywords: log, event, begin, barrier
1093: M*/
1097: /*@C
1098: PetscLogEventGetId - Returns the event id when given the event name.
1100: Not Collective
1102: Input Parameter:
1103: . name - The event name
1105: Output Parameter:
1106: . event - The event
1108: Level: intermediate
1110: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogStageGetId()
1111: @*/
1112: PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event)
1113: {
1114: PetscStageLog stageLog;
1118: PetscLogGetStageLog(&stageLog);
1119: EventRegLogGetEvent(stageLog->eventLog, name, event);
1120: return(0);
1121: }
1124: /*------------------------------------------------ Output Functions -------------------------------------------------*/
1127: /*@C
1128: PetscLogDump - Dumps logs of objects to a file. This file is intended to
1129: be read by bin/petscview. This program no longer exists.
1131: Collective on PETSC_COMM_WORLD
1133: Input Parameter:
1134: . name - an optional file name
1136: Options Database Keys:
1137: + -log - Prints basic log information (for code compiled with PETSC_USE_LOG)
1138: - -log_all - Prints extensive log information (for code compiled with PETSC_USE_LOG)
1139:
1140: Usage:
1141: .vb
1142: PetscInitialize(...);
1143: PetscLogBegin(); or PetscLogAllBegin();
1144: ... code ...
1145: PetscLogDump(filename);
1146: PetscFinalize();
1147: .ve
1149: Notes:
1150: The default file name is
1151: $ Log.<rank>
1152: where <rank> is the processor number. If no name is specified,
1153: this file will be used.
1155: Level: advanced
1157: .keywords: log, dump
1158: .seealso: PetscLogBegin(), PetscLogAllBegin(), PetscLogView()
1159: @*/
1160: PetscErrorCode PetscLogDump(const char sname[])
1161: {
1162: PetscStageLog stageLog;
1163: PetscEventPerfInfo *eventInfo;
1164: FILE *fd;
1165: char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
1166: PetscLogDouble flops, _TotalTime;
1167: PetscMPIInt rank;
1168: int action, object, curStage;
1169: PetscLogEvent event;
1170: PetscErrorCode ierr;
1173: /* Calculate the total elapsed time */
1174: PetscTime(_TotalTime);
1175: _TotalTime -= BaseTime;
1176: /* Open log file */
1177: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
1178: if (sname) {
1179: sprintf(file, "%s.%d", sname, rank);
1180: } else {
1181: sprintf(file, "Log.%d", rank);
1182: }
1183: PetscFixFilename(file, fname);
1184: PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd);
1185: if ((!rank) && (!fd)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname);
1186: /* Output totals */
1187: PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flops %14e %16.8e\n", _TotalFlops, _TotalTime);
1188: PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0);
1189: /* Output actions */
1190: if (logActions) {
1191: PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", numActions);
1192: for(action = 0; action < numActions; action++) {
1193: PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n",
1194: actions[action].time, actions[action].action, (int)actions[action].event, (int)actions[action].classid, actions[action].id1,
1195: actions[action].id2, actions[action].id3, actions[action].flops, actions[action].mem, actions[action].maxmem);
1196: }
1197: }
1198: /* Output objects */
1199: if (logObjects) {
1200: PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", numObjects, numObjectsDestroyed);
1201: for(object = 0; object < numObjects; object++) {
1202: PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", objects[object].parent, (int) objects[object].mem);
1203: if (!objects[object].name[0]) {
1204: PetscFPrintf(PETSC_COMM_WORLD, fd,"No Name\n");
1205: } else {
1206: PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", objects[object].name);
1207: }
1208: if (objects[object].info[0] != 0) {
1209: PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n");
1210: } else {
1211: PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", objects[object].info);
1212: }
1213: }
1214: }
1215: /* Output events */
1216: PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n");
1217: PetscLogGetStageLog(&stageLog);
1218: PetscIntStackTop(stageLog->stack, &curStage);
1219: eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo;
1220: for(event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) {
1221: if (eventInfo[event].time != 0.0) {
1222: flops = eventInfo[event].flops/eventInfo[event].time;
1223: } else {
1224: flops = 0.0;
1225: }
1226: PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count,
1227: eventInfo[event].flops, eventInfo[event].time, flops);
1228: }
1229: PetscFClose(PETSC_COMM_WORLD, fd);
1230: return(0);
1231: }
1235: /*@C
1236: PetscLogViewer - Prints a summary of the logging.
1238: Collective over MPI_Comm
1240: Input Parameter:
1241: . viewer - an ASCII viewer
1243: Options Database Keys:
1244: . -log_summary - Prints summary of log information (for code compiled with PETSC_USE_LOG)
1246: Usage:
1247: .vb
1248: PetscInitialize(...);
1249: PetscLogBegin();
1250: ... code ...
1251: PetscLogView(PetscViewer);
1252: PetscFinalize(...);
1253: .ve
1255: Notes:
1256: By default the summary is printed to stdout.
1258: Level: beginner
1259:
1260: .keywords: log, dump, print
1261: .seealso: PetscLogBegin(), PetscLogDump()
1262: @*/
1263: PetscErrorCode PetscLogView(PetscViewer viewer)
1264: {
1265: FILE *fd;
1266: PetscLogDouble zero = 0.0;
1267: PetscStageLog stageLog;
1268: PetscStageInfo *stageInfo = PETSC_NULL;
1269: PetscEventPerfInfo *eventInfo = PETSC_NULL;
1270: PetscClassPerfInfo *classInfo;
1271: char arch[10], hostname[64], username[16], pname[PETSC_MAX_PATH_LEN], date[64];
1272: const char *name;
1273: PetscLogDouble locTotalTime, TotalTime, TotalFlops;
1274: PetscLogDouble numMessages, messageLength, avgMessLen, numReductions;
1275: PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red;
1276: PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1277: PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1278: PetscLogDouble min, max, tot, ratio, avg, x, y;
1279: PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratCt, totm, totml, totr;
1280: PetscMPIInt minCt, maxCt;
1281: PetscMPIInt size, rank;
1282: PetscBool *localStageUsed, *stageUsed;
1283: PetscBool *localStageVisible, *stageVisible;
1284: int numStages, localNumEvents, numEvents;
1285: int stage, lastStage, oclass;
1286: PetscLogEvent event;
1287: PetscErrorCode ierr;
1288: char version[256];
1289: MPI_Comm comm;
1292: PetscObjectGetComm((PetscObject)viewer,&comm);
1293: PetscViewerASCIIGetPointer(viewer,&fd);
1294: MPI_Comm_size(comm, &size);
1295: MPI_Comm_rank(comm, &rank);
1296: /* Pop off any stages the user forgot to remove */
1297: lastStage = 0;
1298: PetscLogGetStageLog(&stageLog);
1299: PetscStageLogGetCurrent(stageLog, &stage);
1300: while (stage >= 0) {
1301: lastStage = stage;
1302: PetscStageLogPop(stageLog);
1303: PetscStageLogGetCurrent(stageLog, &stage);
1304: }
1305: /* Get the total elapsed time */
1306: PetscTime(locTotalTime); locTotalTime -= BaseTime;
1308: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1309: PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n");
1310: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1311: PetscFPrintf(comm, fd, "\n---------------------------------------------- PETSc Performance Summary: ----------------------------------------------\n\n");
1312: PetscGetArchType(arch, 10);
1313: PetscGetHostName(hostname, 64);
1314: PetscGetUserName(username, 16);
1315: PetscGetProgramName(pname, PETSC_MAX_PATH_LEN);
1316: PetscGetDate(date, 64);
1317: PetscGetVersion(version,256);
1318: if (size == 1) {
1319: PetscFPrintf(comm,fd,"%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date);
1320: } else {
1321: PetscFPrintf(comm,fd,"%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date);
1322: }
1323: PetscFPrintf(comm, fd, "Using %s\n", version);
1325: /* Must preserve reduction count before we go on */
1326: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1328: /* Calculate summary information */
1329: PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total \n");
1330: /* Time */
1331: MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1332: MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1333: MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1334: avg = (tot)/((PetscLogDouble) size);
1335: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1336: PetscFPrintf(comm, fd, "Time (sec): %5.3e %10.5f %5.3e\n", max, ratio, avg);
1337: TotalTime = tot;
1338: /* Objects */
1339: avg = (PetscLogDouble) numObjects;
1340: MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1341: MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1342: MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1343: avg = (tot)/((PetscLogDouble) size);
1344: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1345: PetscFPrintf(comm, fd, "Objects: %5.3e %10.5f %5.3e\n", max, ratio, avg);
1346: /* Flops */
1347: MPI_Allreduce(&_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1348: MPI_Allreduce(&_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1349: MPI_Allreduce(&_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1350: avg = (tot)/((PetscLogDouble) size);
1351: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1352: PetscFPrintf(comm, fd, "Flops: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1353: TotalFlops = tot;
1354: /* Flops/sec -- Must talk to Barry here */
1355: if (locTotalTime != 0.0) flops = _TotalFlops/locTotalTime; else flops = 0.0;
1356: MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1357: MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1358: MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1359: avg = (tot)/((PetscLogDouble) size);
1360: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1361: PetscFPrintf(comm, fd, "Flops/sec: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1362: /* Memory */
1363: PetscMallocGetMaximumUsage(&mem);
1364: if (mem > 0.0) {
1365: MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1366: MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1367: MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1368: avg = (tot)/((PetscLogDouble) size);
1369: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1370: PetscFPrintf(comm, fd, "Memory: %5.3e %10.5f %5.3e\n", max, ratio, tot);
1371: }
1372: /* Messages */
1373: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1374: MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1375: MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1376: MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1377: avg = (tot)/((PetscLogDouble) size);
1378: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1379: PetscFPrintf(comm, fd, "MPI Messages: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1380: numMessages = tot;
1381: /* Message Lengths */
1382: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1383: MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1384: MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1385: MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1386: if (numMessages != 0) avg = (tot)/(numMessages); else avg = 0.0;
1387: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1388: PetscFPrintf(comm, fd, "MPI Message Lengths: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1389: messageLength = tot;
1390: /* Reductions */
1391: MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1392: MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1393: MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1394: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1395: PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %10.5f\n", max, ratio);
1396: numReductions = red; /* wrong because uses count from process zero */
1397: PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n");
1398: PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n");
1399: PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n");
1401: /* Get total number of stages --
1402: Currently, a single processor can register more stages than another, but stages must all be registered in order.
1403: We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
1404: This seems best accomplished by assoicating a communicator with each stage.
1405: */
1406: MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1407: PetscMalloc(numStages * sizeof(PetscBool), &localStageUsed);
1408: PetscMalloc(numStages * sizeof(PetscBool), &stageUsed);
1409: PetscMalloc(numStages * sizeof(PetscBool), &localStageVisible);
1410: PetscMalloc(numStages * sizeof(PetscBool), &stageVisible);
1411: if (numStages > 0) {
1412: stageInfo = stageLog->stageInfo;
1413: for(stage = 0; stage < numStages; stage++) {
1414: if (stage < stageLog->numStages) {
1415: localStageUsed[stage] = stageInfo[stage].used;
1416: localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
1417: } else {
1418: localStageUsed[stage] = PETSC_FALSE;
1419: localStageVisible[stage] = PETSC_TRUE;
1420: }
1421: }
1422: MPI_Allreduce(localStageUsed, stageUsed, numStages, MPI_INT, MPI_LOR, comm);
1423: MPI_Allreduce(localStageVisible, stageVisible, numStages, MPI_INT, MPI_LAND, comm);
1424: for(stage = 0; stage < numStages; stage++) {
1425: if (stageUsed[stage]) {
1426: PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --\n");
1427: PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total counts %%Total Avg %%Total counts %%Total \n");
1428: break;
1429: }
1430: }
1431: for(stage = 0; stage < numStages; stage++) {
1432: if (!stageUsed[stage]) continue;
1433: if (localStageUsed[stage]) {
1434: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1435: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1436: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1437: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1438: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1439: name = stageInfo[stage].name;
1440: } else {
1441: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1442: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1443: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1444: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1445: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1446: name = "";
1447: }
1448: mess *= 0.5; messLen *= 0.5; red /= size;
1449: if (TotalTime != 0.0) fracTime = stageTime/TotalTime; else fracTime = 0.0;
1450: if (TotalFlops != 0.0) fracFlops = flops/TotalFlops; else fracFlops = 0.0;
1451: /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */
1452: if (numMessages != 0.0) fracMessages = mess/numMessages; else fracMessages = 0.0;
1453: if (numMessages != 0.0) avgMessLen = messLen/numMessages; else avgMessLen = 0.0;
1454: if (messageLength != 0.0) fracLength = messLen/messageLength; else fracLength = 0.0;
1455: if (numReductions != 0.0) fracReductions = red/numReductions; else fracReductions = 0.0;
1456: PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% \n",
1457: stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops,
1458: mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions);
1459: }
1460: }
1462: PetscFPrintf(comm, fd,
1463: "\n------------------------------------------------------------------------------------------------------------------------\n");
1464:
1465: PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n");
1466: PetscFPrintf(comm, fd, "Phase summary info:\n");
1467: PetscFPrintf(comm, fd, " Count: number of times phase was executed\n");
1468: PetscFPrintf(comm, fd, " Time and Flops: Max - maximum over all processors\n");
1469: PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n");
1470: PetscFPrintf(comm, fd, " Mess: number of messages sent\n");
1471: PetscFPrintf(comm, fd, " Avg. len: average message length\n");
1472: PetscFPrintf(comm, fd, " Reduct: number of global reductions\n");
1473: PetscFPrintf(comm, fd, " Global: entire computation\n");
1474: PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n");
1475: PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flops in this phase\n");
1476: PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n");
1477: PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n");
1478: PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)\n");
1479: PetscFPrintf(comm, fd,
1480: "------------------------------------------------------------------------------------------------------------------------\n");
1481:
1483: #if defined(PETSC_USE_DEBUG)
1484: PetscFPrintf(comm, fd, "\n\n");
1485: PetscFPrintf(comm, fd, " ##########################################################\n");
1486: PetscFPrintf(comm, fd, " # #\n");
1487: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1488: PetscFPrintf(comm, fd, " # #\n");
1489: PetscFPrintf(comm, fd, " # This code was compiled with a debugging option, #\n");
1490: PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n");
1491: PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n");
1492: PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n");
1493: PetscFPrintf(comm, fd, " # #\n");
1494: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1495: #endif
1496: #if defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_FORTRAN_KERNELS)
1497: PetscFPrintf(comm, fd, "\n\n");
1498: PetscFPrintf(comm, fd, " ##########################################################\n");
1499: PetscFPrintf(comm, fd, " # #\n");
1500: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1501: PetscFPrintf(comm, fd, " # #\n");
1502: PetscFPrintf(comm, fd, " # The code for various complex numbers numerical #\n");
1503: PetscFPrintf(comm, fd, " # kernels uses C++, which generally is not well #\n");
1504: PetscFPrintf(comm, fd, " # optimized. For performance that is about 4-5 times #\n");
1505: PetscFPrintf(comm, fd, " # faster, specify --with-fortran-kernels=1 #\n");
1506: PetscFPrintf(comm, fd, " # when running ./configure.py. #\n");
1507: PetscFPrintf(comm, fd, " # #\n");
1508: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1509: #endif
1511: /* Report events */
1512: PetscFPrintf(comm, fd,
1513: "Event Count Time (sec) Flops --- Global --- --- Stage --- Total\n");
1514:
1515: PetscFPrintf(comm, fd,
1516: " Max Ratio Max Ratio Max Ratio Mess Avg len Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s\n");
1517:
1518: PetscFPrintf(comm,fd,
1519: "------------------------------------------------------------------------------------------------------------------------\n");
1521:
1522: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1523: for(stage = 0; stage < numStages; stage++) {
1524: if (!stageVisible[stage]) continue;
1525: if (localStageUsed[stage]) {
1526: PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1527: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1528: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1529: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1530: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1531: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1532: } else {
1533: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1534: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1535: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1536: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1537: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1538: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1539: }
1540: mess *= 0.5; messLen *= 0.5; red /= size;
1542: /* Get total number of events in this stage --
1543: Currently, a single processor can register more events than another, but events must all be registered in order,
1544: just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
1545: on the event ID. This seems best accomplished by assoicating a communicator with each stage.
1547: Problem: If the event did not happen on proc 1, its name will not be available.
1548: Problem: Event visibility is not implemented
1549: */
1550: if (localStageUsed[stage]) {
1551: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1552: localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1553: } else {
1554: localNumEvents = 0;
1555: }
1556: MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1557: for(event = 0; event < numEvents; event++) {
1558: if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
1559: if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) {
1560: flopr = eventInfo[event].flops;
1561: } else {
1562: flopr = 0.0;
1563: }
1564: MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1565: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1566: MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1567: MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1568: MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1569: MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1570: MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1571: MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1572: MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1573: MPI_Allreduce(&eventInfo[event].count, &minCt, 1, MPI_INT, MPI_MIN, comm);
1574: MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, comm);
1575: name = stageLog->eventLog->eventInfo[event].name;
1576: } else {
1577: flopr = 0.0;
1578: MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1579: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1580: MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1581: MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1582: MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1583: MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1584: MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1585: MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1586: MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1587: MPI_Allreduce(&ierr, &minCt, 1, MPI_INT, MPI_MIN, comm);
1588: MPI_Allreduce(&ierr, &maxCt, 1, MPI_INT, MPI_MAX, comm);
1589: name = "";
1590: }
1591: if (mint < 0.0) {
1592: PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n",mint,name);
1593: mint = 0;
1594: }
1595: if (minf < 0.0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Minimum flops %g over all processors for %s is negative! Not possible!",minf,name);
1596: totm *= 0.5; totml *= 0.5; totr /= size;
1597:
1598: if (maxCt != 0) {
1599: if (minCt != 0) ratCt = ((PetscLogDouble) maxCt)/minCt; else ratCt = 0.0;
1600: if (mint != 0.0) ratt = maxt/mint; else ratt = 0.0;
1601: if (minf != 0.0) ratf = maxf/minf; else ratf = 0.0;
1602: if (TotalTime != 0.0) fracTime = tott/TotalTime; else fracTime = 0.0;
1603: if (TotalFlops != 0.0) fracFlops = totf/TotalFlops; else fracFlops = 0.0;
1604: if (stageTime != 0.0) fracStageTime = tott/stageTime; else fracStageTime = 0.0;
1605: if (flops != 0.0) fracStageFlops = totf/flops; else fracStageFlops = 0.0;
1606: if (numMessages != 0.0) fracMess = totm/numMessages; else fracMess = 0.0;
1607: if (messageLength != 0.0) fracMessLen = totml/messageLength; else fracMessLen = 0.0;
1608: if (numReductions != 0.0) fracRed = totr/numReductions; else fracRed = 0.0;
1609: if (mess != 0.0) fracStageMess = totm/mess; else fracStageMess = 0.0;
1610: if (messLen != 0.0) fracStageMessLen = totml/messLen; else fracStageMessLen = 0.0;
1611: if (red != 0.0) fracStageRed = totr/red; else fracStageRed = 0.0;
1612: if (totm != 0.0) totml /= totm; else totml = 0.0;
1613: if (maxt != 0.0) flopr = totf/maxt; else flopr = 0.0;
1614: PetscFPrintf(comm, fd,
1615: "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f\n",
1616: name, maxCt, ratCt, maxt, ratt, maxf, ratf, totm, totml, totr,
1617: 100.0*fracTime, 100.0*fracFlops, 100.0*fracMess, 100.0*fracMessLen, 100.0*fracRed,
1618: 100.0*fracStageTime, 100.0*fracStageFlops, 100.0*fracStageMess, 100.0*fracStageMessLen, 100.0*fracStageRed,
1619: flopr/1.0e6);
1620: }
1621: }
1622: }
1624: /* Memory usage and object creation */
1625: PetscFPrintf(comm, fd,
1626: "------------------------------------------------------------------------------------------------------------------------\n");
1627: PetscFPrintf(comm, fd, "\n");
1628: PetscFPrintf(comm, fd, "Memory usage is given in bytes:\n\n");
1630: /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1631: the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1632: stats for stages local to processor sets.
1633: */
1634: /* We should figure out the longest object name here (now 20 characters) */
1635: PetscFPrintf(comm, fd, "Object Type Creations Destructions Memory Descendants' Mem.\n");
1636: PetscFPrintf(comm, fd, "Reports information only for process 0.\n");
1637: for(stage = 0; stage < numStages; stage++) {
1638: if (localStageUsed[stage]) {
1639: classInfo = stageLog->stageInfo[stage].classLog->classInfo;
1640: PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1641: for(oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) {
1642: if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) {
1643: PetscFPrintf(comm, fd, "%20s %5d %5d %11.0f %g\n", stageLog->classLog->classInfo[oclass].name,
1644: classInfo[oclass].creations, classInfo[oclass].destructions, classInfo[oclass].mem,
1645: classInfo[oclass].descMem);
1646: }
1647: }
1648: } else {
1649: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1650: }
1651: }
1653: PetscFree(localStageUsed);
1654: PetscFree(stageUsed);
1655: PetscFree(localStageVisible);
1656: PetscFree(stageVisible);
1658: /* Information unrelated to this particular run */
1659: PetscFPrintf(comm, fd,
1660: "========================================================================================================================\n");
1661: PetscTime(y);
1662: PetscTime(x);
1663: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
1664: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
1665: PetscFPrintf(comm,fd,"Average time to get PetscTime(): %g\n", (y-x)/10.0);
1666: /* MPI information */
1667: if (size > 1) {
1668: MPI_Status status;
1669: PetscMPIInt tag;
1670: MPI_Comm newcomm;
1672: MPI_Barrier(comm);
1673: PetscTime(x);
1674: MPI_Barrier(comm);
1675: MPI_Barrier(comm);
1676: MPI_Barrier(comm);
1677: MPI_Barrier(comm);
1678: MPI_Barrier(comm);
1679: PetscTime(y);
1680: PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y-x)/5.0);
1681: PetscCommDuplicate(comm,&newcomm, &tag);
1682: MPI_Barrier(comm);
1683: if (rank) {
1684: MPI_Recv(0, 0, MPI_INT, rank-1, tag, newcomm, &status);
1685: MPI_Send(0, 0, MPI_INT, (rank+1)%size, tag, newcomm);
1686: } else {
1687: PetscTime(x);
1688: MPI_Send(0, 0, MPI_INT, 1, tag, newcomm);
1689: MPI_Recv(0, 0, MPI_INT, size-1, tag, newcomm, &status);
1690: PetscTime(y);
1691: PetscFPrintf(comm,fd,"Average time for zero size MPI_Send(): %g\n", (y-x)/size);
1692: }
1693: PetscCommDestroy(&newcomm);
1694: }
1695: PetscOptionsView(viewer);
1697: /* Machine and compile information */
1698: #if defined(PETSC_USE_FORTRAN_KERNELS)
1699: PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n");
1700: #else
1701: PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n");
1702: #endif
1703: #if defined(PETSC_USE_REAL_SINGLE)
1704: PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n");
1705: #elif defined(PETSC_USE_LONGDOUBLE)
1706: PetscFPrintf(comm, fd, "Compiled with long double precision PetscScalar and PetscReal\n");
1707: #endif
1709: #if defined(PETSC_USE_REAL_MAT_SINGLE)
1710: PetscFPrintf(comm, fd, "Compiled with single precision matrices\n");
1711: #else
1712: PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n");
1713: #endif
1714: PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d\n",
1715: (int) sizeof(short), (int) sizeof(int), (int) sizeof(long), (int) sizeof(void*),(int) sizeof(PetscScalar));
1717: PetscFPrintf(comm, fd, "Configure run at: %s\n",petscconfigureruntime);
1718: PetscFPrintf(comm, fd, "Configure options: %s",petscconfigureoptions);
1719: PetscFPrintf(comm, fd, "%s", petscmachineinfo);
1720: PetscFPrintf(comm, fd, "%s", petsccompilerinfo);
1721: PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo);
1722: PetscFPrintf(comm, fd, "%s", petsclinkerinfo);
1724: /* Cleanup */
1725: PetscFPrintf(comm, fd, "\n");
1726: PetscStageLogPush(stageLog, lastStage);
1727: return(0);
1728: }
1732: /*@C
1733: PetscLogPrintDetailed - Each process prints the times for its own events
1735: Collective over MPI_Comm
1737: Input Parameter:
1738: + comm - The MPI communicator (only one processor prints output)
1739: - file - [Optional] The output file name
1741: Options Database Keys:
1742: . -log_summary_detailed - Prints summary of log information (for code compiled with PETSC_USE_LOG)
1744: Usage:
1745: .vb
1746: PetscInitialize(...);
1747: PetscLogBegin();
1748: ... code ...
1749: PetscLogPrintDetailed(MPI_Comm,filename);
1750: PetscFinalize(...);
1751: .ve
1753: Notes:
1754: By default the summary is printed to stdout.
1756: Level: beginner
1757:
1758: .keywords: log, dump, print
1759: .seealso: PetscLogBegin(), PetscLogDump(), PetscLogView()
1760: @*/
1761: PetscErrorCode PetscLogPrintDetailed(MPI_Comm comm, const char filename[])
1762: {
1763: FILE *fd = PETSC_STDOUT;
1764: PetscStageLog stageLog;
1765: PetscStageInfo *stageInfo = PETSC_NULL;
1766: PetscEventPerfInfo *eventInfo = PETSC_NULL;
1767: const char *name = PETSC_NULL;
1768: PetscLogDouble TotalTime;
1769: PetscLogDouble stageTime, flops, flopr, mess, messLen, red;
1770: PetscLogDouble maxf, totf, maxt, tott, totm, totml, totr = 0.0;
1771: PetscMPIInt maxCt;
1772: PetscBool *stageUsed;
1773: PetscBool *stageVisible;
1774: int numStages, numEvents;
1775: int stage;
1776: PetscLogEvent event;
1777: PetscErrorCode ierr;
1780: /* Pop off any stages the user forgot to remove */
1781: PetscLogGetStageLog(&stageLog);
1782: PetscStageLogGetCurrent(stageLog, &stage);
1783: while (stage >= 0) {
1784: PetscStageLogPop(stageLog);
1785: PetscStageLogGetCurrent(stageLog, &stage);
1786: }
1787: /* Get the total elapsed time */
1788: PetscTime(TotalTime); TotalTime -= BaseTime;
1789: /* Open the summary file */
1790: if (filename) {
1791: PetscFOpen(comm, filename, "w", &fd);
1792: }
1794: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1795: PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n");
1796: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1799: numStages = stageLog->numStages;
1800: PetscMalloc(numStages * sizeof(PetscBool), &stageUsed);
1801: PetscMalloc(numStages * sizeof(PetscBool), &stageVisible);
1802: if (numStages > 0) {
1803: stageInfo = stageLog->stageInfo;
1804: for(stage = 0; stage < numStages; stage++) {
1805: if (stage < stageLog->numStages) {
1806: stageUsed[stage] = stageInfo[stage].used;
1807: stageVisible[stage] = stageInfo[stage].perfInfo.visible;
1808: } else {
1809: stageUsed[stage] = PETSC_FALSE;
1810: stageVisible[stage] = PETSC_TRUE;
1811: }
1812: }
1813: }
1815: /* Report events */
1816: PetscFPrintf(comm, fd,"Event Count Time (sec) Flops/sec \n");
1817: PetscFPrintf(comm, fd," Mess Avg len Reduct \n");
1818: PetscFPrintf(comm,fd,"-----------------------------------------------------------------------------------\n");
1819: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1820: for(stage = 0; stage < numStages; stage++) {
1821: if (!stageVisible[stage]) continue;
1822: if (stageUsed[stage]) {
1823: PetscSynchronizedFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1824: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1825: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1826: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1827: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1828: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1829: }
1830: mess *= 0.5; messLen *= 0.5;
1832: /* Get total number of events in this stage --
1833: */
1834: if (stageUsed[stage]) {
1835: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1836: numEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1837: } else {
1838: numEvents = 0;
1839: }
1840: for(event = 0; event < numEvents; event++) {
1841: if (stageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents)) {
1842: if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) {
1843: flopr = eventInfo[event].flops/eventInfo[event].time;
1844: } else {
1845: flopr = 0.0;
1846: }
1847: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PETSC_COMM_SELF);
1848: MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1849: MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PETSC_COMM_SELF);
1850: MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1851: MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1852: MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1853: totr = eventInfo[event].numReductions;
1854: MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, PETSC_COMM_SELF);
1855: name = stageLog->eventLog->eventInfo[event].name;
1856: totm *= 0.5; totml *= 0.5;
1857: }
1858:
1859: if (maxCt != 0) {
1860: if (totm != 0.0) totml /= totm; else totml = 0.0;
1861: PetscSynchronizedFPrintf(comm, fd,"%-16s %7d %5.4e %3.2e %2.1e %2.1e %2.1e\n",name, maxCt, maxt, maxf, totm, totml, totr);
1862: }
1863: }
1864: }
1865: PetscSynchronizedFlush(comm);
1867: PetscFree(stageUsed);
1868: PetscFree(stageVisible);
1870: PetscFClose(comm, fd);
1871: return(0);
1872: }
1874: /*----------------------------------------------- Counter Functions -------------------------------------------------*/
1877: /*@C
1878: PetscGetFlops - Returns the number of flops used on this processor
1879: since the program began.
1881: Not Collective
1883: Output Parameter:
1884: flops - number of floating point operations
1886: Notes:
1887: A global counter logs all PETSc flop counts. The user can use
1888: PetscLogFlops() to increment this counter to include flops for the
1889: application code.
1891: PETSc automatically logs library events if the code has been
1892: compiled with -DPETSC_USE_LOG (which is the default), and -log,
1893: -log_summary, or -log_all are specified. PetscLogFlops() is
1894: intended for logging user flops to supplement this PETSc
1895: information.
1897: Level: intermediate
1899: .keywords: log, flops, floating point operations
1901: .seealso: PetscGetTime(), PetscLogFlops()
1902: @*/
1903: PetscErrorCode PetscGetFlops(PetscLogDouble *flops)
1904: {
1906: *flops = _TotalFlops;
1907: return(0);
1908: }
1912: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
1913: {
1915: size_t fullLength;
1916: va_list Argp;
1919: if (!logObjects) return(0);
1920: va_start(Argp, format);
1921: PetscVSNPrintf(objects[obj->id].info, 64,format,&fullLength, Argp);
1922: va_end(Argp);
1923: return(0);
1924: }
1927: /*MC
1928: PetscLogFlops - Adds floating point operations to the global counter.
1930: Synopsis:
1931: PetscErrorCode PetscLogFlops(PetscLogDouble f)
1933: Not Collective
1935: Input Parameter:
1936: . f - flop counter
1939: Usage:
1940: .vb
1941: PetscLogEvent USER_EVENT;
1942: PetscLogEventRegister("User event",0,&USER_EVENT);
1943: PetscLogEventBegin(USER_EVENT,0,0,0,0);
1944: [code segment to monitor]
1945: PetscLogFlops(user_flops)
1946: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1947: .ve
1949: Notes:
1950: A global counter logs all PETSc flop counts. The user can use
1951: PetscLogFlops() to increment this counter to include flops for the
1952: application code.
1954: PETSc automatically logs library events if the code has been
1955: compiled with -DPETSC_USE_LOG (which is the default), and -log,
1956: -log_summary, or -log_all are specified. PetscLogFlops() is
1957: intended for logging user flops to supplement this PETSc
1958: information.
1960: Level: intermediate
1962: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscGetFlops()
1964: .keywords: log, flops, floating point operations
1965: M*/
1967: /*MC
1968: PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice)
1969: to get accurate timings
1971: Synopsis:
1972: void PetscPreLoadBegin(PetscBool flag,char *name);
1974: Not Collective
1976: Input Parameter:
1977: + flag - PETSC_TRUE to run twice, PETSC_FALSE to run once, may be overridden
1978: with command line option -preload true or -preload false
1979: - name - name of first stage (lines of code timed separately with -log_summary) to
1980: be preloaded
1982: Usage:
1983: .vb
1984: PetscPreLoadBegin(PETSC_TRUE,"first stage);
1985: lines of code
1986: PetscPreLoadStage("second stage");
1987: lines of code
1988: PetscPreLoadEnd();
1989: .ve
1991: Notes: Only works in C/C++, not Fortran
1993: Flags available within the macro.
1994: + PetscPreLoadingUsed - true if we are or have done preloading
1995: . PetscPreLoadingOn - true if it is CURRENTLY doing preload
1996: . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second
1997: - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on
1998: The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin()
1999: and PetscPreLoadEnd()
2001: Level: intermediate
2003: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadEnd(), PetscPreLoadStage()
2005: Concepts: preloading
2006: Concepts: timing^accurate
2007: Concepts: paging^eliminating effects of
2010: M*/
2012: /*MC
2013: PetscPreLoadEnd - End a segment of code that may be preloaded (run twice)
2014: to get accurate timings
2016: Synopsis:
2017: void PetscPreLoadEnd(void);
2019: Not Collective
2021: Usage:
2022: .vb
2023: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2024: lines of code
2025: PetscPreLoadStage("second stage");
2026: lines of code
2027: PetscPreLoadEnd();
2028: .ve
2030: Notes: only works in C/C++ not fortran
2032: Level: intermediate
2034: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadStage()
2036: M*/
2038: /*MC
2039: PetscPreLoadStage - Start a new segment of code to be timed separately.
2040: to get accurate timings
2042: Synopsis:
2043: void PetscPreLoadStage(char *name);
2045: Not Collective
2047: Usage:
2048: .vb
2049: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2050: lines of code
2051: PetscPreLoadStage("second stage");
2052: lines of code
2053: PetscPreLoadEnd();
2054: .ve
2056: Notes: only works in C/C++ not fortran
2058: Level: intermediate
2060: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd()
2062: M*/
2064: #include <../src/sys/viewer/impls/ascii/asciiimpl.h> /*I "petscsys.h" I*/
2067: /*@
2068: PetscLogViewPython - Saves logging information in a Python format.
2070: Collective on PetscViewer
2072: Input Paramter:
2073: . viewer - viewer to save Python data
2075: Level: intermediate
2077: @*/
2078: PetscErrorCode PetscLogViewPython(PetscViewer viewer)
2079: {
2080: PetscViewer_ASCII *ascii = (PetscViewer_ASCII*)viewer->data;
2081: FILE *fd = ascii->fd;
2082: PetscLogDouble zero = 0.0;
2083: PetscStageLog stageLog;
2084: PetscStageInfo *stageInfo = PETSC_NULL;
2085: PetscEventPerfInfo *eventInfo = PETSC_NULL;
2086: const char *name;
2087: char stageName[2048];
2088: PetscLogDouble locTotalTime, TotalTime = 0, TotalFlops = 0;
2089: PetscLogDouble numMessages = 0, messageLength = 0, avgMessLen, numReductions = 0;
2090: PetscLogDouble stageTime, flops, mem, mess, messLen, red;
2091: PetscLogDouble fracTime, fracFlops, fracMessages, fracLength;
2092: PetscLogDouble fracReductions;
2093: PetscLogDouble tot,avg,x,y,*mydata;
2094: PetscMPIInt maxCt;
2095: PetscMPIInt size, rank, *mycount;
2096: PetscBool *localStageUsed, *stageUsed;
2097: PetscBool *localStageVisible, *stageVisible;
2098: int numStages, localNumEvents, numEvents;
2099: int stage, lastStage;
2100: PetscLogEvent event;
2101: PetscErrorCode ierr;
2102: PetscInt i;
2103: MPI_Comm comm;
2106: PetscObjectGetComm((PetscObject)viewer,&comm);
2107: MPI_Comm_size(comm, &size);
2108: MPI_Comm_rank(comm, &rank);
2109: PetscMalloc(size*sizeof(PetscLogDouble), &mydata);
2110: PetscMalloc(size*sizeof(PetscMPIInt), &mycount);
2112: /* Pop off any stages the user forgot to remove */
2113: lastStage = 0;
2114: PetscLogGetStageLog(&stageLog);
2115: PetscStageLogGetCurrent(stageLog, &stage);
2116: while (stage >= 0) {
2117: lastStage = stage;
2118: PetscStageLogPop(stageLog);
2119: PetscStageLogGetCurrent(stageLog, &stage);
2120: }
2121: /* Get the total elapsed time */
2122: PetscTime(locTotalTime); locTotalTime -= BaseTime;
2124: PetscFPrintf(comm, fd, "\n#------ PETSc Performance Summary ----------\n\n");
2125: PetscFPrintf(comm, fd, "Nproc = %d\n",size);
2127: /* Must preserve reduction count before we go on */
2128: red = (petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct)/((PetscLogDouble) size);
2130: /* Calculate summary information */
2132: /* Time */
2133: MPI_Gather(&locTotalTime,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2134: if (!rank){
2135: PetscFPrintf(comm, fd, "Time = [ " );
2136: tot = 0.0;
2137: for (i=0; i<size; i++){
2138: tot += mydata[i];
2139: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2140: }
2141: PetscFPrintf(comm, fd, "]\n" );
2142: avg = (tot)/((PetscLogDouble) size);
2143: TotalTime = tot;
2144: }
2146: /* Objects */
2147: avg = (PetscLogDouble) numObjects;
2148: MPI_Gather(&avg,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2149: if (!rank){
2150: PetscFPrintf(comm, fd, "Objects = [ " );
2151: for (i=0; i<size; i++){
2152: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2153: }
2154: PetscFPrintf(comm, fd, "]\n" );
2155: }
2157: /* Flops */
2158: MPI_Gather(&_TotalFlops,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2159: if (!rank){
2160: PetscFPrintf(comm, fd, "Flops = [ " );
2161: tot = 0.0;
2162: for (i=0; i<size; i++){
2163: tot += mydata[i];
2164: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2165: }
2166: PetscFPrintf(comm, fd, "]\n");
2167: TotalFlops = tot;
2168: }
2170: /* Memory */
2171: PetscMallocGetMaximumUsage(&mem);
2172: MPI_Gather(&mem,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2173: if (!rank){
2174: PetscFPrintf(comm, fd, "Memory = [ " );
2175: for (i=0; i<size; i++){
2176: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2177: }
2178: PetscFPrintf(comm, fd, "]\n" );
2179: }
2181: /* Messages */
2182: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
2183: MPI_Gather(&mess,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2184: if (!rank){
2185: PetscFPrintf(comm, fd, "MPIMessages = [ " );
2186: tot = 0.0;
2187: for (i=0; i<size; i++){
2188: tot += mydata[i];
2189: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2190: }
2191: PetscFPrintf(comm, fd, "]\n" );
2192: numMessages = tot;
2193: }
2195: /* Message Lengths */
2196: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
2197: MPI_Gather(&mess,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2198: if (!rank){
2199: PetscFPrintf(comm, fd, "MPIMessageLengths = [ " );
2200: tot = 0.0;
2201: for (i=0; i<size; i++){
2202: tot += mydata[i];
2203: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2204: }
2205: PetscFPrintf(comm, fd, "]\n" );
2206: messageLength = tot;
2207: }
2209: /* Reductions */
2210: MPI_Gather(&red,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2211: if (!rank){
2212: PetscFPrintf(comm, fd, "MPIReductions = [ " );
2213: tot = 0.0;
2214: for (i=0; i<size; i++){
2215: tot += mydata[i];
2216: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2217: }
2218: PetscFPrintf(comm, fd, "]\n" );
2219: numReductions = tot;
2220: }
2222: /* Get total number of stages --
2223: Currently, a single processor can register more stages than another, but stages must all be registered in order.
2224: We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
2225: This seems best accomplished by assoicating a communicator with each stage.
2226: */
2227: MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
2228: PetscMalloc(numStages * sizeof(PetscBool), &localStageUsed);
2229: PetscMalloc(numStages * sizeof(PetscBool), &stageUsed);
2230: PetscMalloc(numStages * sizeof(PetscBool), &localStageVisible);
2231: PetscMalloc(numStages * sizeof(PetscBool), &stageVisible);
2232: if (numStages > 0) {
2233: stageInfo = stageLog->stageInfo;
2234: for(stage = 0; stage < numStages; stage++) {
2235: if (stage < stageLog->numStages) {
2236: localStageUsed[stage] = stageInfo[stage].used;
2237: localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
2238: } else {
2239: localStageUsed[stage] = PETSC_FALSE;
2240: localStageVisible[stage] = PETSC_TRUE;
2241: }
2242: }
2243: MPI_Allreduce(localStageUsed, stageUsed, numStages, MPI_INT, MPI_LOR, comm);
2244: MPI_Allreduce(localStageVisible, stageVisible, numStages, MPI_INT, MPI_LAND, comm);
2245: for(stage = 0; stage < numStages; stage++) {
2246: if (stageUsed[stage]) {
2247: PetscFPrintf(comm, fd, "\n#Summary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --\n");
2248: PetscFPrintf(comm, fd, "# Avg %%Total Avg %%Total counts %%Total Avg %%Total counts %%Total \n");
2249: break;
2250: }
2251: }
2252: for(stage = 0; stage < numStages; stage++) {
2253: if (!stageUsed[stage]) continue;
2254: if (localStageUsed[stage]) {
2255: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2256: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2257: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2258: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2259: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2260: name = stageInfo[stage].name;
2261: } else {
2262: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2263: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2264: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2265: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2266: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2267: name = "";
2268: }
2269: mess *= 0.5; messLen *= 0.5; red /= size;
2270: if (TotalTime != 0.0) fracTime = stageTime/TotalTime; else fracTime = 0.0;
2271: if (TotalFlops != 0.0) fracFlops = flops/TotalFlops; else fracFlops = 0.0;
2272: /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */
2273: if (numMessages != 0.0) fracMessages = mess/numMessages; else fracMessages = 0.0;
2274: if (numMessages != 0.0) avgMessLen = messLen/numMessages; else avgMessLen = 0.0;
2275: if (messageLength != 0.0) fracLength = messLen/messageLength; else fracLength = 0.0;
2276: if (numReductions != 0.0) fracReductions = red/numReductions; else fracReductions = 0.0;
2277: PetscFPrintf(comm, fd, "# ");
2278: PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% \n",
2279: stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops,
2280: mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions);
2281: }
2282: }
2284: /* Report events */
2285: PetscFPrintf(comm,fd,"\n# Event\n");
2286: PetscFPrintf(comm,fd,"# ------------------------------------------------------\n");
2287: PetscFPrintf(comm,fd,"class Stage(object):\n");
2288: PetscFPrintf(comm,fd," def __init__(self, name):\n");
2289: PetscFPrintf(comm,fd," self.name = name\n");
2290: PetscFPrintf(comm,fd," self.event = {}\n");
2291: PetscFPrintf(comm,fd, "class Dummy(object):\n");
2292: PetscFPrintf(comm,fd, " pass\n");
2293: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
2294: for(stage = 0; stage < numStages; stage++) {
2295: if (!stageVisible[stage]) continue;
2296: if (localStageUsed[stage]) {
2297: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2298: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2299: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2300: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2301: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2302: } else {
2303: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
2304: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2305: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2306: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2307: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2308: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2309: }
2310: mess *= 0.5; messLen *= 0.5; red /= size;
2312: /* Get total number of events in this stage --
2313: Currently, a single processor can register more events than another, but events must all be registered in order,
2314: just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
2315: on the event ID. This seems best accomplished by assoicating a communicator with each stage.
2317: Problem: If the event did not happen on proc 1, its name will not be available.
2318: Problem: Event visibility is not implemented
2319: */
2321: {
2322: size_t len, c;
2324: PetscStrcpy(stageName, stageInfo[stage].name);
2325: PetscStrlen(stageName, &len);
2326: for(c = 0; c < len; ++c) {
2327: if (stageName[c] == ' ') stageName[c] = '_';
2328: }
2329: }
2330: if (!rank){
2331: PetscFPrintf(comm, fd, "%s = Stage('%s')\n", stageName, stageName);
2332: }
2334: if (localStageUsed[stage]) {
2335: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
2336: localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
2337: } else {
2338: localNumEvents = 0;
2339: }
2340: MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
2341: for(event = 0; event < numEvents; event++) {
2342: if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
2343: MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, comm);
2344: name = stageLog->eventLog->eventInfo[event].name;
2345: } else {
2346: MPI_Allreduce(&ierr, &maxCt, 1, MPI_INT, MPI_MAX, comm);
2347: name = "";
2348: }
2350: if (maxCt != 0) {
2351: PetscFPrintf(comm, fd,"#\n");
2352: if (!rank){
2353: PetscFPrintf(comm, fd, "%s = Dummy()\n",name);
2354: PetscFPrintf(comm, fd, "%s.event['%s'] = %s\n",stageName,name,name);
2355: }
2356: /* Count */
2357: MPI_Gather(&eventInfo[event].count,1,MPI_INT,mycount,1,MPI_INT,0,comm);
2358: PetscFPrintf(comm, fd, "%s.Count = [ ", name);
2359: if (!rank){
2360: for (i=0; i<size; i++){
2361: PetscFPrintf(comm, fd, " %7d,",mycount[i] );
2362: }
2363: PetscFPrintf(comm, fd, "]\n" );
2364: }
2365: /* Time */
2366: MPI_Gather(&eventInfo[event].time,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2367: if (!rank){
2368: PetscFPrintf(comm, fd, "%s.Time = [ ", name);
2369: for (i=0; i<size; i++){
2370: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2371: }
2372: PetscFPrintf(comm, fd, "]\n" );
2373: }
2374: /* Flops */
2375: MPI_Gather(&eventInfo[event].flops,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2376: if (!rank){
2377: PetscFPrintf(comm, fd, "%s.Flops = [ ", name);
2378: for (i=0; i<size; i++){
2379: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2380: }
2381: PetscFPrintf(comm, fd, "]\n" );
2382: }
2383: /* Num Messages */
2384: MPI_Gather(&eventInfo[event].numMessages,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2385: PetscFPrintf(comm, fd, "%s.NumMessages = [ ", name);
2386: if (!rank){
2387: for (i=0; i<size; i++){
2388: PetscFPrintf(comm, fd, " %7.1e,",mydata[i] );
2389: }
2390: PetscFPrintf(comm, fd, "]\n" );
2391: }
2392: /* Message Length */
2393: MPI_Gather(&eventInfo[event].messageLength,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2394: if (!rank){
2395: PetscFPrintf(comm, fd, "%s.MessageLength = [ ", name);
2396: for (i=0; i<size; i++){
2397: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2398: }
2399: PetscFPrintf(comm, fd, "]\n" );
2400: }
2401: /* Num Reductions */
2402: MPI_Gather(&eventInfo[event].numReductions,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2403: PetscFPrintf(comm, fd, "%s.NumReductions = [ ", name);
2404: if (!rank){
2405: for (i=0; i<size; i++){
2406: PetscFPrintf(comm, fd, " %7.1e,",mydata[i] );
2407: }
2408: PetscFPrintf(comm, fd, "]\n" );
2409: }
2410: }
2411: }
2412: }
2414: /* Right now, only stages on the first processor are reported here, meaning only objects associated with
2415: the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
2416: stats for stages local to processor sets.
2417: */
2418: for(stage = 0; stage < numStages; stage++) {
2419: if (!localStageUsed[stage]) {
2420: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
2421: }
2422: }
2424: PetscFree(localStageUsed);
2425: PetscFree(stageUsed);
2426: PetscFree(localStageVisible);
2427: PetscFree(stageVisible);
2428: PetscFree(mydata);
2429: PetscFree(mycount);
2431: /* Information unrelated to this particular run */
2432: PetscFPrintf(comm, fd,
2433: "# ========================================================================================================================\n");
2434: PetscTime(y);
2435: PetscTime(x);
2436: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
2437: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
2438: PetscFPrintf(comm,fd,"AveragetimetogetPetscTime = %g\n", (y-x)/10.0);
2439: /* MPI information */
2440: if (size > 1) {
2441: MPI_Status status;
2442: PetscMPIInt tag;
2443: MPI_Comm newcomm;
2445: MPI_Barrier(comm);
2446: PetscTime(x);
2447: MPI_Barrier(comm);
2448: MPI_Barrier(comm);
2449: MPI_Barrier(comm);
2450: MPI_Barrier(comm);
2451: MPI_Barrier(comm);
2452: PetscTime(y);
2453: PetscFPrintf(comm, fd, "AveragetimeforMPI_Barrier = %g\n", (y-x)/5.0);
2454: PetscCommDuplicate(comm,&newcomm, &tag);
2455: MPI_Barrier(comm);
2456: if (rank) {
2457: MPI_Recv(0, 0, MPI_INT, rank-1, tag, newcomm, &status);
2458: MPI_Send(0, 0, MPI_INT, (rank+1)%size, tag, newcomm);
2459: } else {
2460: PetscTime(x);
2461: MPI_Send(0, 0, MPI_INT, 1, tag, newcomm);
2462: MPI_Recv(0, 0, MPI_INT, size-1, tag, newcomm, &status);
2463: PetscTime(y);
2464: PetscFPrintf(comm,fd,"AveragetimforzerosizeMPI_Send = %g\n", (y-x)/size);
2465: }
2466: PetscCommDestroy(&newcomm);
2467: }
2469: /* Cleanup */
2470: PetscFPrintf(comm, fd, "\n");
2471: PetscStageLogPush(stageLog, lastStage);
2472: return(0);
2473: }
2475: #else /* end of -DPETSC_USE_LOG section */
2479: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2480: {
2482: return(0);
2483: }
2485: #endif /* PETSC_USE_LOG*/
2488: PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
2489: PetscClassId PETSC_OBJECT_CLASSID = 0;
2493: /*@C
2494: PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code.
2496: Not Collective
2498: Input Parameter:
2499: . name - The class name
2500:
2501: Output Parameter:
2502: . oclass - The class id or classid
2504: Level: developer
2506: .keywords: log, class, register
2508: @*/
2509: PetscErrorCode PetscClassIdRegister(const char name[],PetscClassId *oclass )
2510: {
2511: #if defined(PETSC_USE_LOG)
2512: PetscStageLog stageLog;
2513: PetscInt stage;
2515: #endif
2518: *oclass = ++PETSC_LARGEST_CLASSID;
2519: #if defined(PETSC_USE_LOG)
2520: PetscLogGetStageLog(&stageLog);
2521: PetscClassRegLogRegister(stageLog->classLog, name, *oclass);
2522: for(stage = 0; stage < stageLog->numStages; stage++) {
2523: ClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
2524: }
2525: #endif
2526: return(0);
2527: }