LLVM OpenMP 22.0.0git
summarizeStats.py
Go to the documentation of this file.
1#!/usr/bin/env python
2
3
4import pandas as pd
5import numpy as np
6import re
7import sys
8import os
9import argparse
10import matplotlib
11from matplotlib import pyplot as plt
12from matplotlib.projections.polar import PolarAxes
13from matplotlib.projections import register_projection
14
15"""
16Read the stats file produced by the OpenMP runtime
17and produce a processed summary
18
19The radar_factory original code was taken from
20matplotlib.org/examples/api/radar_chart.html
21We added support to handle negative values for radar charts
22"""
23
24def radar_factory(num_vars, frame='circle'):
25 """Create a radar chart with num_vars axes."""
26 # calculate evenly-spaced axis angles
27 theta = 2*np.pi * np.linspace(0, 1-1./num_vars, num_vars)
28 # rotate theta such that the first axis is at the top
29 #theta += np.pi/2
30
31 def draw_poly_frame(self, x0, y0, r):
32 # TODO: use transforms to convert (x, y) to (r, theta)
33 verts = [(r*np.cos(t) + x0, r*np.sin(t) + y0) for t in theta]
34 return plt.Polygon(verts, closed=True, edgecolor='k')
35
36 def draw_circle_frame(self, x0, y0, r):
37 return plt.Circle((x0, y0), r)
38
39 frame_dict = {'polygon': draw_poly_frame, 'circle': draw_circle_frame}
40 if frame not in frame_dict:
41 raise ValueError("unknown value for `frame`: %s" % frame)
42
43 class RadarAxes(PolarAxes):
44 """
45 Class for creating a radar chart (a.k.a. a spider or star chart)
46
47 http://en.wikipedia.org/wiki/Radar_chart
48 """
49 name = 'radar'
50 # use 1 line segment to connect specified points
51 RESOLUTION = 1
52 # define draw_frame method
53 draw_frame = frame_dict[frame]
54
55 def fill(self, *args, **kwargs):
56 """Override fill so that line is closed by default"""
57 closed = kwargs.pop('closed', True)
58 return super(RadarAxes, self).fill(closed=closed, *args, **kwargs)
59
60 def plot(self, *args, **kwargs):
61 """Override plot so that line is closed by default"""
62 lines = super(RadarAxes, self).plot(*args, **kwargs)
63 #for line in lines:
64 # self._close_line(line)
65
66 def set_varlabels(self, labels):
67 self.set_thetagrids(theta * 180/np.pi, labels,fontsize=14)
68
69 def _gen_axes_patch(self):
70 x0, y0 = (0.5, 0.5)
71 r = 0.5
72 return self.draw_frame(x0, y0, r)
73
74 register_projection(RadarAxes)
75 return theta
76
77# Code to read the raw stats
78def extractSI(s):
79 """Convert a measurement with a range suffix into a suitably scaled value"""
80 du = s.split()
81 num = float(du[0])
82 units = du[1] if len(du) == 2 else ' '
83 # http://physics.nist.gov/cuu/Units/prefixes.html
84 factor = {'Y': 1e24,
85 'Z': 1e21,
86 'E': 1e18,
87 'P': 1e15,
88 'T': 1e12,
89 'G': 1e9,
90 'M': 1e6,
91 'k': 1e3,
92 ' ': 1 ,
93 'm': -1e3, # Yes, I do mean that, see below for the explanation.
94 'u': -1e6,
95 'n': -1e9,
96 'p': -1e12,
97 'f': -1e15,
98 'a': -1e18,
99 'z': -1e21,
100 'y': -1e24}[units[0]]
101 # Minor trickery here is an attempt to preserve accuracy by using a single
102 # divide, rather than multiplying by 1/x, which introduces two roundings
103 # since 1/10 is not representable perfectly in IEEE floating point. (Not
104 # that this really matters, other than for cleanliness, since we're likely
105 # reading numbers with at most five decimal digits of precision).
106 return num*factor if factor > 0 else num/-factor
107
108def readData(f):
109 line = f.readline()
110 fieldnames = [x.strip() for x in line.split(',')]
111 line = f.readline().strip()
112 data = []
113 while line != "":
114 if line[0] != '#':
115 fields = line.split(',')
116 data.append ((fields[0].strip(), [extractSI(v) for v in fields[1:]]))
117 line = f.readline().strip()
118 # Man, working out this next incantation out was non-trivial!
119 # They really want you to be snarfing data in csv or some other
120 # format they understand!
121 res = pd.DataFrame.from_items(data, columns=fieldnames[1:], orient='index')
122 return res
123
125 """Skip lines with leading #"""
126 line = f.readline()
127 while line[0] == '#':
128 line = f.readline()
129 line = line.strip()
130 if line == "Statistics on exit\n" or "Aggregate for all threads\n":
131 line = f.readline()
132 return readData(f)
133
135 """This can be just the same!"""
136 return readData(f)
137
138def readFile(fname):
139 """Read the statistics from the file. Return a dict with keys "timers", "counters" """
140 res = {}
141 try:
142 with open(fname) as f:
143 res["timers"] = readTimers(f)
144 res["counters"] = readCounters(f)
145 return res
146 except (OSError, IOError):
147 print("Cannot open " + fname)
148 return None
149
151 """I.e. values which are neither null nor zero"""
152 return [p and q for (p,q) in zip (pd.notnull(l), l != 0.0)]
153
155 """I.e. values which are null or zero"""
156 return [not p for p in usefulValues(l)]
157
158interestingStats = ("counters", "timers")
159statProperties = {"counters" : ("Count", "Counter Statistics"),
160 "timers" : ("Time (ticks)", "Timer Statistics")
161 }
162
163def drawChart(data, kind, filebase):
164 """Draw a summary bar chart for the requested data frame into the specified file"""
165 data["Mean"].plot(kind="bar", logy=True, grid=True, colormap="GnBu",
166 yerr=data["SD"], ecolor="black")
167 plt.xlabel("OMP Constructs")
168 plt.ylabel(statProperties[kind][0])
169 plt.title (statProperties[kind][1])
170 plt.tight_layout()
171 plt.savefig(filebase+"_"+kind)
172
173def normalizeValues(data, countField, factor):
174 """Normalize values into a rate by dividing them all by the given factor"""
175 data[[k for k in data.keys() if k != countField]] /= factor
176
177
178def setRadarFigure(titles):
179 """Set the attributes for the radar plots"""
180 fig = plt.figure(figsize=(9,9))
181 rect = [0.1, 0.1, 0.8, 0.8]
182 labels = [0.2, 0.4, 0.6, 0.8, 1, 2, 3, 4, 5, 10]
183 matplotlib.rcParams.update({'font.size':13})
184 theta = radar_factory(len(titles))
185 ax = fig.add_axes(rect, projection='radar')
186 ax.set_rgrids(labels)
187 ax.set_varlabels(titles)
188 ax.text(theta[2], 1, "Linear->Log", horizontalalignment='center', color='green', fontsize=18)
189 return {'ax':ax, 'theta':theta}
190
191
192def drawRadarChart(data, kind, filebase, params, color):
193 """Draw the radar plots"""
194 tmp_lin = data * 0
195 tmp_log = data * 0
196 for key in data.keys():
197 if data[key] >= 1:
198 tmp_log[key] = np.log10(data[key])
199 else:
200 tmp_lin[key] = (data[key])
201 params['ax'].plot(params['theta'], tmp_log, color='b', label=filebase+"_"+kind+"_log")
202 params['ax'].plot(params['theta'], tmp_lin, color='r', label=filebase+"_"+kind+"_linear")
203 params['ax'].legend(loc='best', bbox_to_anchor=(1.4,1.2))
204 params['ax'].set_rlim((0, np.ceil(max(tmp_log))))
205
206def multiAppBarChartSettings(ax, plt, index, width, n, tmp, s):
207 ax.set_yscale('log')
208 ax.legend()
209 ax.set_xticks(index + width * n / 2)
210 ax.set_xticklabels(tmp[s]['Total'].keys(), rotation=50, horizontalalignment='right')
211 plt.xlabel("OMP Constructs")
212 plt.ylabel(statProperties[s][0])
213 plt.title(statProperties[s][1])
214 plt.tight_layout()
215
216def derivedTimerStats(data):
217 stats = {}
218 for key in data.keys():
219 if key == 'OMP_worker_thread_life':
220 totalRuntime = data['OMP_worker_thread_life']
221 elif key in ('FOR_static_iterations', 'OMP_PARALLEL_args',
222 'OMP_set_numthreads', 'FOR_dynamic_iterations'):
223 break
224 else:
225 stats[key] = 100 * data[key] / totalRuntime
226 return stats
227
228def compPie(data):
229 compKeys = {}
230 nonCompKeys = {}
231 for key in data.keys():
232 if key in ('OMP_critical', 'OMP_single', 'OMP_serial',
233 'OMP_parallel', 'OMP_master', 'OMP_task_immediate',
234 'OMP_task_taskwait', 'OMP_task_taskyield', 'OMP_task_taskgroup',
235 'OMP_task_join_bar', 'OMP_task_plain_bar', 'OMP_task_taskyield'):
236 compKeys[key] = data[key]
237 else:
238 nonCompKeys[key] = data[key]
239 print("comp keys:", compKeys, "\n\n non comp keys:", nonCompKeys)
240 return [compKeys, nonCompKeys]
241
242def drawMainPie(data, filebase, colors):
243 sizes = [sum(data[0].values()), sum(data[1].values())]
244 explode = [0,0]
245 labels = ["Compute - " + "%.2f" % sizes[0], "Non Compute - " + "%.2f" % sizes[1]]
246 patches = plt.pie(sizes, explode, colors=colors, startangle=90)
247 plt.title("Time Division")
248 plt.axis('equal')
249 plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16)
250 plt.savefig(filebase+"_main_pie", bbox_inches='tight')
251
252def drawSubPie(data, tag, filebase, colors):
253 explode = []
254 labels = data.keys()
255 sizes = data.values()
256 total = sum(sizes)
257 percent = []
258 for i in range(len(sizes)):
259 explode.append(0)
260 percent.append(100 * sizes[i] / total)
261 labels[i] = labels[i] + " - %.2f" % percent[i]
262 patches = plt.pie(sizes, explode=explode, colors=colors, startangle=90)
263 plt.title(tag+"(Percentage of Total:"+" %.2f" % (sum(data.values()))+")")
264 plt.tight_layout()
265 plt.axis('equal')
266 plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16)
267 plt.savefig(filebase+"_"+tag, bbox_inches='tight')
268
269def main():
270 parser = argparse.ArgumentParser(description='''This script takes a list
271 of files containing each of which contain output from a stats-gathering
272 enabled OpenMP runtime library. Each stats file is read, parsed, and
273 used to produce a summary of the statistics''')
274 parser.add_argument('files', nargs='+',
275 help='files to parse which contain stats-gathering output')
276 command_args = parser.parse_args()
277 colors = ['orange', 'b', 'r', 'yellowgreen', 'lightsage', 'lightpink',
278 'green', 'purple', 'yellow', 'cyan', 'mediumturquoise',
279 'olive']
280 stats = {}
281 matplotlib.rcParams.update({'font.size':22})
282 for s in interestingStats:
283 fig, ax = plt.subplots()
284 width = 0.45
285 n = 0
286 index = 0
287
288 for f in command_args.files:
289 filebase = os.path.splitext(f)[0]
290 tmp = readFile(f)
291 data = tmp[s]['Total']
292 """preventing repetition by removing rows similar to Total_OMP_work
293 as Total_OMP_work['Total'] is same as OMP_work['Total']"""
294 if s == 'counters':
295 elapsedTime = tmp["timers"]["Mean"]["OMP_worker_thread_life"]
296 normalizeValues(tmp["counters"], "SampleCount",
297 elapsedTime / 1.e9)
298 """Plotting radar charts"""
299 params = setRadarFigure(data.keys())
300 chartType = "radar"
301 drawRadarChart(data, s, filebase, params, colors[n])
302 """radar Charts finish here"""
303 plt.savefig(filebase + "_" + s + "_" + chartType, bbox_inches="tight")
304 elif s == "timers":
305 print("overheads in " + filebase)
306 numThreads = tmp[s]["SampleCount"]["Total_OMP_parallel"]
307 for key in data.keys():
308 if key[0:5] == 'Total':
309 del data[key]
310 stats[filebase] = derivedTimerStats(data)
311 dataSubSet = compPie(stats[filebase])
312 drawMainPie(dataSubSet, filebase, colors)
313 plt.figure(0)
314 drawSubPie(dataSubSet[0], "Computational Time", filebase, colors)
315 plt.figure(1)
316 drawSubPie(dataSubSet[1], "Non Computational Time", filebase, colors)
317 with open('derivedStats_{}.csv'.format(filebase), 'w') as f:
318 f.write('================={}====================\n'.format(filebase))
319 f.write(pd.DataFrame(stats[filebase].items()).to_csv()+'\n')
320 n += 1
321 plt.close()
322
323if __name__ == "__main__":
324 main()
drawChart(data, kind, filebase)
normalizeValues(data, countField, factor)
radar_factory(num_vars, frame='circle')