LLVM OpenMP 20.0.0git
summarizeStats.py
Go to the documentation of this file.
1#!/usr/bin/env python
2
3import pandas as pd
4import numpy as np
5import re
6import sys
7import os
8import argparse
9import matplotlib
10from matplotlib import pyplot as plt
11from matplotlib.projections.polar import PolarAxes
12from matplotlib.projections import register_projection
13
14"""
15Read the stats file produced by the OpenMP runtime
16and produce a processed summary
17
18The radar_factory original code was taken from
19matplotlib.org/examples/api/radar_chart.html
20We added support to handle negative values for radar charts
21"""
22
23def radar_factory(num_vars, frame='circle'):
24 """Create a radar chart with num_vars axes."""
25 # calculate evenly-spaced axis angles
26 theta = 2*np.pi * np.linspace(0, 1-1./num_vars, num_vars)
27 # rotate theta such that the first axis is at the top
28 #theta += np.pi/2
29
30 def draw_poly_frame(self, x0, y0, r):
31 # TODO: use transforms to convert (x, y) to (r, theta)
32 verts = [(r*np.cos(t) + x0, r*np.sin(t) + y0) for t in theta]
33 return plt.Polygon(verts, closed=True, edgecolor='k')
34
35 def draw_circle_frame(self, x0, y0, r):
36 return plt.Circle((x0, y0), r)
37
38 frame_dict = {'polygon': draw_poly_frame, 'circle': draw_circle_frame}
39 if frame not in frame_dict:
40 raise ValueError, 'unknown value for `frame`: %s' % frame
41
42 class RadarAxes(PolarAxes):
43 """
44 Class for creating a radar chart (a.k.a. a spider or star chart)
45
46 http://en.wikipedia.org/wiki/Radar_chart
47 """
48 name = 'radar'
49 # use 1 line segment to connect specified points
50 RESOLUTION = 1
51 # define draw_frame method
52 draw_frame = frame_dict[frame]
53
54 def fill(self, *args, **kwargs):
55 """Override fill so that line is closed by default"""
56 closed = kwargs.pop('closed', True)
57 return super(RadarAxes, self).fill(closed=closed, *args, **kwargs)
58
59 def plot(self, *args, **kwargs):
60 """Override plot so that line is closed by default"""
61 lines = super(RadarAxes, self).plot(*args, **kwargs)
62 #for line in lines:
63 # self._close_line(line)
64
65 def set_varlabels(self, labels):
66 self.set_thetagrids(theta * 180/np.pi, labels,fontsize=14)
67
68 def _gen_axes_patch(self):
69 x0, y0 = (0.5, 0.5)
70 r = 0.5
71 return self.draw_frame(x0, y0, r)
72
73 register_projection(RadarAxes)
74 return theta
75
76# Code to read the raw stats
77def extractSI(s):
78 """Convert a measurement with a range suffix into a suitably scaled value"""
79 du = s.split()
80 num = float(du[0])
81 units = du[1] if len(du) == 2 else ' '
82 # http://physics.nist.gov/cuu/Units/prefixes.html
83 factor = {'Y': 1e24,
84 'Z': 1e21,
85 'E': 1e18,
86 'P': 1e15,
87 'T': 1e12,
88 'G': 1e9,
89 'M': 1e6,
90 'k': 1e3,
91 ' ': 1 ,
92 'm': -1e3, # Yes, I do mean that, see below for the explanation.
93 'u': -1e6,
94 'n': -1e9,
95 'p': -1e12,
96 'f': -1e15,
97 'a': -1e18,
98 'z': -1e21,
99 'y': -1e24}[units[0]]
100 # Minor trickery here is an attempt to preserve accuracy by using a single
101 # divide, rather than multiplying by 1/x, which introduces two roundings
102 # since 1/10 is not representable perfectly in IEEE floating point. (Not
103 # that this really matters, other than for cleanliness, since we're likely
104 # reading numbers with at most five decimal digits of precision).
105 return num*factor if factor > 0 else num/-factor
106
107def readData(f):
108 line = f.readline()
109 fieldnames = [x.strip() for x in line.split(',')]
110 line = f.readline().strip()
111 data = []
112 while line != "":
113 if line[0] != '#':
114 fields = line.split(',')
115 data.append ((fields[0].strip(), [extractSI(v) for v in fields[1:]]))
116 line = f.readline().strip()
117 # Man, working out this next incantation out was non-trivial!
118 # They really want you to be snarfing data in csv or some other
119 # format they understand!
120 res = pd.DataFrame.from_items(data, columns=fieldnames[1:], orient='index')
121 return res
122
124 """Skip lines with leading #"""
125 line = f.readline()
126 while line[0] == '#':
127 line = f.readline()
128 line = line.strip()
129 if line == "Statistics on exit\n" or "Aggregate for all threads\n":
130 line = f.readline()
131 return readData(f)
132
134 """This can be just the same!"""
135 return readData(f)
136
137def readFile(fname):
138 """Read the statistics from the file. Return a dict with keys "timers", "counters" """
139 res = {}
140 try:
141 with open(fname) as f:
142 res["timers"] = readTimers(f)
143 res["counters"] = readCounters(f)
144 return res
145 except (OSError, IOError):
146 print "Cannot open " + fname
147 return None
148
150 """I.e. values which are neither null nor zero"""
151 return [p and q for (p,q) in zip (pd.notnull(l), l != 0.0)]
152
154 """I.e. values which are null or zero"""
155 return [not p for p in usefulValues(l)]
156
157interestingStats = ("counters", "timers")
158statProperties = {"counters" : ("Count", "Counter Statistics"),
159 "timers" : ("Time (ticks)", "Timer Statistics")
160 }
161
162def drawChart(data, kind, filebase):
163 """Draw a summary bar chart for the requested data frame into the specified file"""
164 data["Mean"].plot(kind="bar", logy=True, grid=True, colormap="GnBu",
165 yerr=data["SD"], ecolor="black")
166 plt.xlabel("OMP Constructs")
167 plt.ylabel(statProperties[kind][0])
168 plt.title (statProperties[kind][1])
169 plt.tight_layout()
170 plt.savefig(filebase+"_"+kind)
171
172def normalizeValues(data, countField, factor):
173 """Normalize values into a rate by dividing them all by the given factor"""
174 data[[k for k in data.keys() if k != countField]] /= factor
175
176
177def setRadarFigure(titles):
178 """Set the attributes for the radar plots"""
179 fig = plt.figure(figsize=(9,9))
180 rect = [0.1, 0.1, 0.8, 0.8]
181 labels = [0.2, 0.4, 0.6, 0.8, 1, 2, 3, 4, 5, 10]
182 matplotlib.rcParams.update({'font.size':13})
183 theta = radar_factory(len(titles))
184 ax = fig.add_axes(rect, projection='radar')
185 ax.set_rgrids(labels)
186 ax.set_varlabels(titles)
187 ax.text(theta[2], 1, "Linear->Log", horizontalalignment='center', color='green', fontsize=18)
188 return {'ax':ax, 'theta':theta}
189
190
191def drawRadarChart(data, kind, filebase, params, color):
192 """Draw the radar plots"""
193 tmp_lin = data * 0
194 tmp_log = data * 0
195 for key in data.keys():
196 if data[key] >= 1:
197 tmp_log[key] = np.log10(data[key])
198 else:
199 tmp_lin[key] = (data[key])
200 params['ax'].plot(params['theta'], tmp_log, color='b', label=filebase+"_"+kind+"_log")
201 params['ax'].plot(params['theta'], tmp_lin, color='r', label=filebase+"_"+kind+"_linear")
202 params['ax'].legend(loc='best', bbox_to_anchor=(1.4,1.2))
203 params['ax'].set_rlim((0, np.ceil(max(tmp_log))))
204
205def multiAppBarChartSettings(ax, plt, index, width, n, tmp, s):
206 ax.set_yscale('log')
207 ax.legend()
208 ax.set_xticks(index + width * n / 2)
209 ax.set_xticklabels(tmp[s]['Total'].keys(), rotation=50, horizontalalignment='right')
210 plt.xlabel("OMP Constructs")
211 plt.ylabel(statProperties[s][0])
212 plt.title(statProperties[s][1])
213 plt.tight_layout()
214
216 stats = {}
217 for key in data.keys():
218 if key == 'OMP_worker_thread_life':
219 totalRuntime = data['OMP_worker_thread_life']
220 elif key in ('FOR_static_iterations', 'OMP_PARALLEL_args',
221 'OMP_set_numthreads', 'FOR_dynamic_iterations'):
222 break
223 else:
224 stats[key] = 100 * data[key] / totalRuntime
225 return stats
226
227def compPie(data):
228 compKeys = {}
229 nonCompKeys = {}
230 for key in data.keys():
231 if key in ('OMP_critical', 'OMP_single', 'OMP_serial',
232 'OMP_parallel', 'OMP_master', 'OMP_task_immediate',
233 'OMP_task_taskwait', 'OMP_task_taskyield', 'OMP_task_taskgroup',
234 'OMP_task_join_bar', 'OMP_task_plain_bar', 'OMP_task_taskyield'):
235 compKeys[key] = data[key]
236 else:
237 nonCompKeys[key] = data[key]
238 print "comp keys:", compKeys, "\n\n non comp keys:", nonCompKeys
239 return [compKeys, nonCompKeys]
240
241def drawMainPie(data, filebase, colors):
242 sizes = [sum(data[0].values()), sum(data[1].values())]
243 explode = [0,0]
244 labels = ["Compute - " + "%.2f" % sizes[0], "Non Compute - " + "%.2f" % sizes[1]]
245 patches = plt.pie(sizes, explode, colors=colors, startangle=90)
246 plt.title("Time Division")
247 plt.axis('equal')
248 plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16)
249 plt.savefig(filebase+"_main_pie", bbox_inches='tight')
250
251def drawSubPie(data, tag, filebase, colors):
252 explode = []
253 labels = data.keys()
254 sizes = data.values()
255 total = sum(sizes)
256 percent = []
257 for i in range(len(sizes)):
258 explode.append(0)
259 percent.append(100 * sizes[i] / total)
260 labels[i] = labels[i] + " - %.2f" % percent[i]
261 patches = plt.pie(sizes, explode=explode, colors=colors, startangle=90)
262 plt.title(tag+"(Percentage of Total:"+" %.2f" % (sum(data.values()))+")")
263 plt.tight_layout()
264 plt.axis('equal')
265 plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16)
266 plt.savefig(filebase+"_"+tag, bbox_inches='tight')
267
268def main():
269 parser = argparse.ArgumentParser(description='''This script takes a list
270 of files containing each of which contain output from a stats-gathering
271 enabled OpenMP runtime library. Each stats file is read, parsed, and
272 used to produce a summary of the statistics''')
273 parser.add_argument('files', nargs='+',
274 help='files to parse which contain stats-gathering output')
275 command_args = parser.parse_args()
276 colors = ['orange', 'b', 'r', 'yellowgreen', 'lightsage', 'lightpink',
277 'green', 'purple', 'yellow', 'cyan', 'mediumturquoise',
278 'olive']
279 stats = {}
280 matplotlib.rcParams.update({'font.size':22})
281 for s in interestingStats:
282 fig, ax = plt.subplots()
283 width = 0.45
284 n = 0
285 index = 0
286
287 for f in command_args.files:
288 filebase = os.path.splitext(f)[0]
289 tmp = readFile(f)
290 data = tmp[s]['Total']
291 """preventing repetition by removing rows similar to Total_OMP_work
292 as Total_OMP_work['Total'] is same as OMP_work['Total']"""
293 if s == 'counters':
294 elapsedTime = tmp["timers"]["Mean"]["OMP_worker_thread_life"]
295 normalizeValues(tmp["counters"], "SampleCount",
296 elapsedTime / 1.e9)
297 """Plotting radar charts"""
298 params = setRadarFigure(data.keys())
299 chartType = "radar"
300 drawRadarChart(data, s, filebase, params, colors[n])
301 """radar Charts finish here"""
302 plt.savefig(filebase+"_"+s+"_"+chartType, bbox_inches='tight')
303 elif s == 'timers':
304 print "overheads in "+filebase
305 numThreads = tmp[s]['SampleCount']['Total_OMP_parallel']
306 for key in data.keys():
307 if key[0:5] == 'Total':
308 del data[key]
309 stats[filebase] = derivedTimerStats(data)
310 dataSubSet = compPie(stats[filebase])
311 drawMainPie(dataSubSet, filebase, colors)
312 plt.figure(0)
313 drawSubPie(dataSubSet[0], "Computational Time", filebase, colors)
314 plt.figure(1)
315 drawSubPie(dataSubSet[1], "Non Computational Time", filebase, colors)
316 with open('derivedStats_{}.csv'.format(filebase), 'w') as f:
317 f.write('================={}====================\n'.format(filebase))
318 f.write(pd.DataFrame(stats[filebase].items()).to_csv()+'\n')
319 n += 1
320 plt.close()
321
322if __name__ == "__main__":
323 main()
def radar_factory(num_vars, frame='circle')
def derivedTimerStats(data)
def readFile(fname)
def compPie(data)
def setRadarFigure(titles)
def drawChart(data, kind, filebase)
def drawSubPie(data, tag, filebase, colors)
def drawMainPie(data, filebase, colors)
def normalizeValues(data, countField, factor)
def drawRadarChart(data, kind, filebase, params, color)
def multiAppBarChartSettings(ax, plt, index, width, n, tmp, s)