15 #include "kmp_stats.h"
25 #define STRINGIZE2(x) #x
26 #define STRINGIZE(x) STRINGIZE2(x)
28 #define expandName(name, flags, ignore) {STRINGIZE(name), flags},
29 statInfo timeStat::timerInfo[] = {
30 KMP_FOREACH_TIMER(expandName, 0){
"TIMER_LAST", 0}};
31 const statInfo counter::counterInfo[] = {
35 #define expandName(ignore1, ignore2, ignore3) {0.0, 0.0, 0.0},
36 kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
37 KMP_FOREACH_TIMER(expandName, 0){0.0, 0.0, 0.0}};
40 const kmp_stats_output_module::rgb_color
41 kmp_stats_output_module::globalColorArray[] = {
65 static uint32_t statsPrinted = 0;
68 static kmp_stats_output_module *__kmp_stats_global_output = NULL;
70 double logHistogram::binMax[] = {
71 1.e1l, 1.e2l, 1.e3l, 1.e4l, 1.e5l, 1.e6l, 1.e7l, 1.e8l,
72 1.e9l, 1.e10l, 1.e11l, 1.e12l, 1.e13l, 1.e14l, 1.e15l, 1.e16l,
73 1.e17l, 1.e18l, 1.e19l, 1.e20l, 1.e21l, 1.e22l, 1.e23l, 1.e24l,
74 1.e25l, 1.e26l, 1.e27l, 1.e28l, 1.e29l, 1.e30l};
78 void statistic::addSample(
double sample) {
80 KMP_DEBUG_ASSERT(std::isfinite(sample));
82 double delta = sample - meanVal;
84 sampleCount = sampleCount + 1;
85 meanVal = meanVal + delta / sampleCount;
86 m2 = m2 + delta * (sample - meanVal);
88 minVal = std::min(minVal, sample);
89 maxVal = std::max(maxVal, sample);
91 hist.addSample(sample);
94 statistic &statistic::operator+=(
const statistic &other) {
95 if (other.sampleCount == 0)
98 if (sampleCount == 0) {
103 uint64_t newSampleCount = sampleCount + other.sampleCount;
104 double dnsc = double(newSampleCount);
105 double dsc = double(sampleCount);
106 double dscBydnsc = dsc / dnsc;
107 double dosc = double(other.sampleCount);
108 double delta = other.meanVal - meanVal;
116 meanVal = meanVal * dscBydnsc + other.meanVal * (1 - dscBydnsc);
117 m2 = m2 + other.m2 + dscBydnsc * dosc * delta * delta;
118 minVal = std::min(minVal, other.minVal);
119 maxVal = std::max(maxVal, other.maxVal);
120 sampleCount = newSampleCount;
127 void statistic::scale(
double factor) {
128 minVal = minVal * factor;
129 maxVal = maxVal * factor;
130 meanVal = meanVal * factor;
131 m2 = m2 * factor * factor;
135 std::string statistic::format(
char unit,
bool total)
const {
136 std::string result = formatSI(sampleCount, 9,
' ');
138 if (sampleCount == 0) {
139 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
140 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
141 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
143 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
144 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
146 result = result + std::string(
", ") + formatSI(minVal, 9, unit);
147 result = result + std::string(
", ") + formatSI(meanVal, 9, unit);
148 result = result + std::string(
", ") + formatSI(maxVal, 9, unit);
151 result + std::string(
", ") + formatSI(meanVal * sampleCount, 9, unit);
152 result = result + std::string(
", ") + formatSI(getSD(), 9, unit);
160 int logHistogram::minBin()
const {
161 for (
int i = 0; i < numBins; i++) {
162 if (bins[i].count != 0)
163 return i - logOffset;
169 int logHistogram::maxBin()
const {
170 for (
int i = numBins - 1; i >= 0; i--) {
171 if (bins[i].count != 0)
172 return i - logOffset;
178 uint32_t logHistogram::findBin(
double sample) {
179 double v = std::fabs(sample);
184 for (
int b = 0; b < numBins; b++)
188 "Trying to add a sample that is too large into a histogram\n");
193 void logHistogram::addSample(
double sample) {
202 KMP_DEBUG_ASSERT(std::isfinite(sample));
203 uint32_t bin = findBin(sample);
204 KMP_DEBUG_ASSERT(0 <= bin && bin < numBins);
206 bins[bin].count += 1;
207 bins[bin].total += sample;
215 std::string logHistogram::format(
char unit)
const {
216 std::stringstream result;
218 result <<
"Bin, Count, Total\n";
220 result <<
"0, " << formatSI(zeroCount, 9,
' ') <<
", ",
221 formatSI(0.0, 9, unit);
222 if (count(minBin()) == 0)
226 for (
int i = minBin(); i <= maxBin(); i++) {
227 result <<
"10**" << i <<
"<=v<10**" << (i + 1) <<
", "
228 << formatSI(count(i), 9,
' ') <<
", " << formatSI(total(i), 9, unit);
238 void explicitTimer::start(tsc_tick_count tick) {
242 __kmp_stats_thread_ptr->incrementNestValue();
247 void explicitTimer::stop(tsc_tick_count tick,
248 kmp_stats_list *stats_ptr ) {
249 if (startTime.getValue() == 0)
252 stat->addSample(((tick - startTime) - totalPauseTime).ticks());
256 stats_ptr = __kmp_stats_thread_ptr;
257 stats_ptr->push_event(
258 startTime.getValue() - __kmp_stats_start_time.getValue(),
259 tick.getValue() - __kmp_stats_start_time.getValue(),
260 __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
261 stats_ptr->decrementNestValue();
271 partitionedTimers::partitionedTimers() { timer_stack.reserve(8); }
274 void partitionedTimers::init(explicitTimer timer) {
275 KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
276 timer_stack.push_back(timer);
277 timer_stack.back().start(tsc_tick_count::now());
284 void partitionedTimers::push(explicitTimer timer) {
289 explicitTimer *current_timer, *new_timer;
291 KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
292 timer_stack.push_back(timer);
293 stack_size = timer_stack.size();
294 current_timer = &(timer_stack[stack_size - 2]);
295 new_timer = &(timer_stack[stack_size - 1]);
296 tsc_tick_count tick = tsc_tick_count::now();
297 current_timer->pause(tick);
298 new_timer->start(tick);
302 void partitionedTimers::pop() {
307 explicitTimer *old_timer, *new_timer;
308 size_t stack_size = timer_stack.size();
309 KMP_DEBUG_ASSERT(stack_size > 1);
310 old_timer = &(timer_stack[stack_size - 1]);
311 new_timer = &(timer_stack[stack_size - 2]);
312 tsc_tick_count tick = tsc_tick_count::now();
313 old_timer->stop(tick);
314 new_timer->resume(tick);
315 timer_stack.pop_back();
318 void partitionedTimers::exchange(explicitTimer timer) {
323 explicitTimer *current_timer, *new_timer;
325 KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
326 tsc_tick_count tick = tsc_tick_count::now();
327 stack_size = timer_stack.size();
328 current_timer = &(timer_stack[stack_size - 1]);
329 current_timer->stop(tick);
330 timer_stack.pop_back();
331 timer_stack.push_back(timer);
332 new_timer = &(timer_stack[stack_size - 1]);
333 new_timer->start(tick);
340 void partitionedTimers::windup() {
341 while (timer_stack.size() > 1) {
345 if (timer_stack.size() > 0) {
346 timer_stack.back().stop(tsc_tick_count::now());
347 timer_stack.pop_back();
353 void kmp_stats_event_vector::deallocate() {
364 int compare_two_events(
const void *event1,
const void *event2) {
365 const kmp_stats_event *ev1 = RCAST(
const kmp_stats_event *, event1);
366 const kmp_stats_event *ev2 = RCAST(
const kmp_stats_event *, event2);
368 if (ev1->getStart() < ev2->getStart())
370 else if (ev1->getStart() > ev2->getStart())
376 void kmp_stats_event_vector::sort() {
377 qsort(events, internal_size,
sizeof(kmp_stats_event), compare_two_events);
383 kmp_stats_list *kmp_stats_list::push_back(
int gtid) {
384 kmp_stats_list *newnode =
385 (kmp_stats_list *)__kmp_allocate(
sizeof(kmp_stats_list));
388 new (newnode) kmp_stats_list();
389 newnode->setGtid(gtid);
390 newnode->prev = this->prev;
391 newnode->next =
this;
392 newnode->prev->next = newnode;
393 newnode->next->prev = newnode;
396 void kmp_stats_list::deallocate() {
397 kmp_stats_list *ptr = this->next;
398 kmp_stats_list *delptr = this->next;
399 while (ptr !=
this) {
403 delptr->_event_vector.deallocate();
404 delptr->~kmp_stats_list();
408 kmp_stats_list::iterator kmp_stats_list::begin() {
409 kmp_stats_list::iterator it;
413 kmp_stats_list::iterator kmp_stats_list::end() {
414 kmp_stats_list::iterator it;
418 int kmp_stats_list::size() {
420 kmp_stats_list::iterator it;
421 for (retval = 0, it = begin(); it != end(); it++, retval++) {
428 kmp_stats_list::iterator::iterator() : ptr(NULL) {}
429 kmp_stats_list::iterator::~iterator() {}
430 kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
431 this->ptr = this->ptr->next;
434 kmp_stats_list::iterator kmp_stats_list::iterator::operator++(
int dummy) {
435 this->ptr = this->ptr->next;
438 kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
439 this->ptr = this->ptr->prev;
442 kmp_stats_list::iterator kmp_stats_list::iterator::operator--(
int dummy) {
443 this->ptr = this->ptr->prev;
446 bool kmp_stats_list::iterator::operator!=(
const kmp_stats_list::iterator &rhs) {
447 return this->ptr != rhs.ptr;
449 bool kmp_stats_list::iterator::operator==(
const kmp_stats_list::iterator &rhs) {
450 return this->ptr == rhs.ptr;
452 kmp_stats_list *kmp_stats_list::iterator::operator*()
const {
458 const char *kmp_stats_output_module::eventsFileName = NULL;
459 const char *kmp_stats_output_module::plotFileName = NULL;
460 int kmp_stats_output_module::printPerThreadFlag = 0;
461 int kmp_stats_output_module::printPerThreadEventsFlag = 0;
463 static char const *lastName(
char *name) {
464 int l = strlen(name);
465 for (
int i = l - 1; i >= 0; --i) {
475 static char const *getImageName(
char *buffer,
size_t buflen) {
476 FILE *f = fopen(
"/proc/self/cmdline",
"r");
484 size_t n = fread(buffer, 1, buflen, f);
487 KMP_CHECK_SYSFAIL(
"fread", 1)
490 buffer[buflen - 1] = char(0);
491 return lastName(buffer);
494 static void getTime(
char *buffer,
size_t buflen,
bool underscores =
false) {
499 struct tm *tm_info = localtime(&timer);
501 strftime(buffer, buflen,
"%Y-%m-%d_%H%M%S", tm_info);
503 strftime(buffer, buflen,
"%Y-%m-%d %H%M%S", tm_info);
507 static std::string generateFilename(
char const *prototype,
508 char const *imageName) {
511 for (
int i = 0; prototype[i] != char(0); i++) {
512 char ch = prototype[i];
516 if (prototype[i] ==
char(0))
519 switch (prototype[i]) {
523 getTime(date,
sizeof(date),
true);
531 std::stringstream ss;
547 void kmp_stats_output_module::init() {
549 char *statsFileName = getenv(
"KMP_STATS_FILE");
550 eventsFileName = getenv(
"KMP_STATS_EVENTS_FILE");
551 plotFileName = getenv(
"KMP_STATS_PLOT_FILE");
552 char *threadStats = getenv(
"KMP_STATS_THREADS");
553 char *threadEvents = getenv(
"KMP_STATS_EVENTS");
557 char imageName[1024];
559 outputFileName = generateFilename(
560 statsFileName, getImageName(&imageName[0],
sizeof(imageName)));
562 eventsFileName = eventsFileName ? eventsFileName :
"events.dat";
563 plotFileName = plotFileName ? plotFileName :
"events.plt";
567 printPerThreadFlag = __kmp_str_match_true(threadStats);
568 printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
570 if (printPerThreadEventsFlag) {
575 timeStat::clearEventFlags();
579 void kmp_stats_output_module::setupEventColors() {
581 int globalColorIndex = 0;
582 int numGlobalColors =
sizeof(globalColorArray) /
sizeof(rgb_color);
583 for (i = 0; i < TIMER_LAST; i++) {
585 timerColorInfo[i] = globalColorArray[globalColorIndex];
586 globalColorIndex = (globalColorIndex + 1) % numGlobalColors;
591 void kmp_stats_output_module::printTimerStats(FILE *statsOut,
592 statistic
const *theStats,
593 statistic
const *totalStats) {
595 "Timer, SampleCount, Min, "
596 "Mean, Max, Total, SD\n");
597 for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
598 statistic
const *stat = &theStats[s];
601 fprintf(statsOut,
"%-35s, %s\n", timeStat::name(s),
602 stat->format(tag,
true).c_str());
605 for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
608 fprintf(statsOut,
"Total_%-29s, %s\n", timeStat::name(s),
609 totalStats[s].format(tag,
true).c_str());
613 if (theStats[0].haveHist()) {
614 fprintf(statsOut,
"\nTimer distributions\n");
615 for (
int s = 0; s < TIMER_LAST; s++) {
616 statistic
const *stat = &theStats[s];
618 if (stat->getCount() != 0) {
621 fprintf(statsOut,
"%s\n", timeStat::name(timer_e(s)));
622 fprintf(statsOut,
"%s\n", stat->getHist()->format(tag).c_str());
628 void kmp_stats_output_module::printCounterStats(FILE *statsOut,
629 statistic
const *theStats) {
630 fprintf(statsOut,
"Counter, ThreadCount, Min, Mean, "
631 " Max, Total, SD\n");
632 for (
int s = 0; s < COUNTER_LAST; s++) {
633 statistic
const *stat = &theStats[s];
634 fprintf(statsOut,
"%-25s, %s\n", counter::name(counter_e(s)),
635 stat->format(
' ',
true).c_str());
638 if (theStats[0].haveHist()) {
639 fprintf(statsOut,
"\nCounter distributions\n");
640 for (
int s = 0; s < COUNTER_LAST; s++) {
641 statistic
const *stat = &theStats[s];
643 if (stat->getCount() != 0) {
644 fprintf(statsOut,
"%s\n", counter::name(counter_e(s)));
645 fprintf(statsOut,
"%s\n", stat->getHist()->format(
' ').c_str());
651 void kmp_stats_output_module::printCounters(FILE *statsOut,
652 counter
const *theCounters) {
655 fprintf(statsOut,
"\nCounter, Count\n");
656 for (
int c = 0; c < COUNTER_LAST; c++) {
657 counter
const *stat = &theCounters[c];
658 fprintf(statsOut,
"%-25s, %s\n", counter::name(counter_e(c)),
659 formatSI(stat->getValue(), 9,
' ').c_str());
663 void kmp_stats_output_module::printEvents(FILE *eventsOut,
664 kmp_stats_event_vector *theEvents,
668 for (
int i = 0; i < theEvents->size(); i++) {
669 kmp_stats_event ev = theEvents->at(i);
670 rgb_color color = getEventColor(ev.getTimerName());
671 fprintf(eventsOut,
"%d %llu %llu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", gtid,
672 static_cast<unsigned long long>(ev.getStart()),
673 static_cast<unsigned long long>(ev.getStop()),
674 1.2 - (ev.getNestLevel() * 0.2), color.r, color.g, color.b,
675 timeStat::name(ev.getTimerName()));
680 void kmp_stats_output_module::windupExplicitTimers() {
684 kmp_stats_list::iterator it;
685 for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
686 kmp_stats_list *ptr = *it;
687 ptr->getPartitionedTimers()->windup();
692 void kmp_stats_output_module::printPloticusFile() {
694 int size = __kmp_stats_list->size();
695 FILE *plotOut = fopen(plotFileName,
"w+");
697 fprintf(plotOut,
"#proc page\n"
701 fprintf(plotOut,
"#proc getdata\n"
705 fprintf(plotOut,
"#proc areadef\n"
706 " title: OpenMP Sampling Timeline\n"
707 " titledetails: align=center size=16\n"
708 " rectangle: 1 1 13 9\n"
709 " xautorange: datafield=2,3\n"
710 " yautorange: -1 %d\n\n",
713 fprintf(plotOut,
"#proc xaxis\n"
715 " stubdetails: size=12\n"
716 " label: Time (ticks)\n"
717 " labeldetails: size=14\n\n");
719 fprintf(plotOut,
"#proc yaxis\n"
722 " stubdetails: size=12\n"
724 " labeldetails: size=14\n\n",
727 fprintf(plotOut,
"#proc bars\n"
728 " exactcolorfield: 5\n"
731 " segmentfields: 2 3\n"
732 " barwidthfield: 4\n\n");
735 for (i = 0; i < TIMER_LAST; i++) {
737 rgb_color c = getEventColor((timer_e)i);
738 fprintf(plotOut,
"#proc legendentry\n"
739 " sampletype: color\n"
741 " details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
742 timeStat::name((timer_e)i), c.r, c.g, c.b);
746 fprintf(plotOut,
"#proc legend\n"
748 " location: max max\n\n");
753 static void outputEnvVariable(FILE *statsOut,
char const *name) {
754 char const *value = getenv(name);
755 fprintf(statsOut,
"# %s = %s\n", name, value ? value :
"*unspecified*");
763 void kmp_stats_output_module::printHeaderInfo(FILE *statsOut) {
764 std::time_t now = std::time(0);
768 std::strftime(&buffer[0],
sizeof(buffer),
"%c", std::localtime(&now));
769 fprintf(statsOut,
"# Time of run: %s\n", &buffer[0]);
770 if (gethostname(&hostName[0],
sizeof(hostName)) == 0)
771 fprintf(statsOut,
"# Hostname: %s\n", &hostName[0]);
772 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
773 fprintf(statsOut,
"# CPU: %s\n", &__kmp_cpuinfo.name[0]);
774 fprintf(statsOut,
"# Family: %d, Model: %d, Stepping: %d\n",
775 __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping);
776 if (__kmp_cpuinfo.frequency == 0)
777 fprintf(statsOut,
"# Nominal frequency: Unknown\n");
779 fprintf(statsOut,
"# Nominal frequency: %sz\n",
780 formatSI(
double(__kmp_cpuinfo.frequency), 9,
'H').c_str());
781 outputEnvVariable(statsOut,
"KMP_HW_SUBSET");
782 outputEnvVariable(statsOut,
"KMP_AFFINITY");
783 outputEnvVariable(statsOut,
"KMP_BLOCKTIME");
784 outputEnvVariable(statsOut,
"KMP_LIBRARY");
785 fprintf(statsOut,
"# Production runtime built " __DATE__
" " __TIME__
"\n");
789 void kmp_stats_output_module::outputStats(
const char *heading) {
793 windupExplicitTimers();
795 statistic allStats[TIMER_LAST];
796 statistic totalStats[TIMER_LAST];
798 statistic allCounters[COUNTER_LAST];
801 !outputFileName.empty() ? fopen(outputFileName.c_str(),
"a+") : stderr;
806 if (eventPrintingEnabled()) {
807 eventsOut = fopen(eventsFileName,
"w+");
810 printHeaderInfo(statsOut);
811 fprintf(statsOut,
"%s\n", heading);
813 kmp_stats_list::iterator it;
814 for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
815 int t = (*it)->getGtid();
817 if (printPerThreadFlag) {
818 fprintf(statsOut,
"Thread %d\n", t);
819 printTimerStats(statsOut, (*it)->getTimers(), 0);
820 printCounters(statsOut, (*it)->getCounters());
821 fprintf(statsOut,
"\n");
824 if (eventPrintingEnabled()) {
825 kmp_stats_event_vector events = (*it)->getEventVector();
826 printEvents(eventsOut, &events, t);
830 for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
832 if ((timeStat::masterOnly(s) && (t != 0)) ||
834 (timeStat::workerOnly(s) && (t == 0))
840 statistic *threadStat = (*it)->getTimer(s);
841 allStats[s] += *threadStat;
845 totalStats[s].addSample(threadStat->getTotal());
849 for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) {
850 if (counter::masterOnly(c) && t != 0)
852 allCounters[c].addSample((*it)->getCounter(c)->getValue());
856 if (eventPrintingEnabled()) {
861 fprintf(statsOut,
"Aggregate for all threads\n");
862 printTimerStats(statsOut, &allStats[0], &totalStats[0]);
863 fprintf(statsOut,
"\n");
864 printCounterStats(statsOut, &allCounters[0]);
866 if (statsOut != stderr)
876 void __kmp_reset_stats() {
877 kmp_stats_list::iterator it;
878 for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
879 timeStat *timers = (*it)->getTimers();
880 counter *counters = (*it)->getCounters();
882 for (
int t = 0; t < TIMER_LAST; t++)
885 for (
int c = 0; c < COUNTER_LAST; c++)
889 (*it)->resetEventVector();
895 void __kmp_output_stats(
const char *heading) {
896 __kmp_stats_global_output->outputStats(heading);
900 void __kmp_accumulate_stats_at_exit(
void) {
902 if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
905 __kmp_output_stats(
"Statistics on exit");
908 void __kmp_stats_init(
void) {
909 __kmp_init_tas_lock(&__kmp_stats_lock);
910 __kmp_stats_start_time = tsc_tick_count::now();
911 __kmp_stats_global_output =
new kmp_stats_output_module();
912 __kmp_stats_list =
new kmp_stats_list();
915 void __kmp_stats_fini(
void) {
916 __kmp_accumulate_stats_at_exit();
917 __kmp_stats_list->deallocate();
918 delete __kmp_stats_global_output;
919 delete __kmp_stats_list;