Belle II Software development
tools.py
1#!/usr/bin/env python3
2
3
10
11""" Tools collection
12
13In the tools collection all plotting tools are gathered.
14
15"""
16
17from alignment.fancystuff import transform
18from alignment.fancystuff.settings import create_figure
19
20import matplotlib.pyplot as plt
21import pandas as pd
22import numpy as np
23from scipy.stats import chisqprob
24import math
25
26
27def set_axis_label_range(ax, new_start, new_end, n_labels=5, axis=1, to_flat=None):
28 """
29 Set the labels to a different range
30 :param ax: axis object
31 :param new_start: New start value
32 :param new_end: New end value
33 :param n_labels: N labels
34 :param axis: default is x axis 1
35 :param to_flat: Flat transformation object for getting non linear values on the axis
36 """
37
38 start, end = ax.get_xlim()
39 # print start, end
40 label_position = np.append(np.arange(start, end, (end - start) / float(n_labels - 1)), end)
41
42 # Wrong linear interploation
43 new_labels = np.append(np.arange(new_start, new_end, (new_end - new_start) / float(n_labels - 1)), new_end)
44
45 # None linear 'correct' case using the CDF as reference
46 if to_flat is not None:
47 assert isinstance(to_flat, transform.ToFlat)
48 x_on_flat = np.linspace(0, 1, n_labels)
49 new_labels = []
50
51 for x, i in zip(x_on_flat, list(range(0, n_labels))):
52 new_labels.append(to_flat.get_x(x))
53 new_labels[-1] = to_flat.max
54 new_labels[0] = to_flat.min
55
56 if axis == 1:
57 ax.set_xticks(label_position)
58 ax.set_xticklabels(["%.2f" % i for i in new_labels])
59 else:
60 ax.set_yticks(label_position)
61 ax.set_yticklabels(["%.2f" % i for i in new_labels])
62
63
64def draw_flat_correlation(x, y, ax=None, draw_label=True, width=5):
65 """
66 This function draws a flat correlation distribution.
67 Both x an y have to be equally sized and are transformed to a flat distribution.
68
69 :param x: dist x, pandas Series
70 :param y: dist y, pandas Series
71 :param ax: axis object if drawn in a subplot
72 :param draw_label: draw the labels of the distribution (only works with pandas Series)
73 :param width: width of the plot, default 5
74 """
75
76 not_on_axes = True if ax is None else False
77
78 if ax is None:
79 fig, ax = create_figure(width=width, ratio=7 / 6.)
80
81 assert isinstance(x, pd.Series or np.array), 'Argument of wrong type!'
82 assert isinstance(y, pd.Series or np.array), 'Argument of wrong type!'
83 x_val = x.values
84 y_val = y.values
85
86 # Flat Distribution
87 tx = transform.ToFlat()
88 ty = transform.ToFlat()
89 tx.fit(x_val)
90 ty.fit(y_val)
91
92 # bins and expected events
93 n_bins = transform.get_optimal_bin_size(min(len(x), len(y)))
94 n_bins = int(math.sqrt(n_bins) * 2)
95 nexp = len(x) / n_bins ** 2
96 nerr = math.sqrt(nexp)
97 a = np.histogram2d(tx.transform(x_val), ty.transform(y_val), bins=(n_bins, n_bins))
98
99 # Transforming the matrix
100 a = np.array(a[0])
101 a = (a - nexp) / nerr
102
103 # Draw the matrix
104 im = ax.imshow(a.T, interpolation='nearest', vmin=-5, vmax=5)
105 if not_on_axes:
106 print("Printing colorbar")
107 plt.colorbar(im, fraction=0.046, pad=0.04)
108 set_axis_label_range(ax, x.min(), x.max(), to_flat=tx)
109 set_axis_label_range(ax, y.min(), y.max(), axis=0, to_flat=ty)
110 else:
111 ax.set_xticklabels([])
112 ax.set_yticklabels([])
113
114 if draw_label:
115 ax.set_xlabel(x.name)
116 ax.set_ylabel(y.name)
117
118 # Calculate overall chi2 error for flat distribution
119 chi2 = 0
120 for i in range(0, n_bins):
121 for j in range(0, n_bins):
122 # a[i][j] = (a[i][j] - nexp) / nerr
123 chi2 += a[i][j] * a[i][j]
124
125 proba = chisqprob(chi2, n_bins * n_bins - ((n_bins - 1) + (n_bins - 1) + 1))
126 if not_on_axes:
127 ax.set_title("Probability of flat hypothesis %.2f%%" % (proba * 100))
128 return im
129
130
132
133 """ Basic Profile plot
134
135 Creates the profile Histogram from x and y distrinbutions
136 It plots mean(y) in bins of x
137
138 Attributes:
139 x_axis (array) : Binning in x
140 mean (array) : Mean of y in bin x
141 err (array) : Std of Mean y in bin x
142 label (string) : Matplotlib label for the plot
143 """
144
145 def __init__(self, x, y, x_axis=None, n_bins=None, label=None):
146 """ init function
147 :param x: Distribution in x
148 :param y: Distribution in y
149 :param n_bins: (optional) n bins in x, is set automatically if not provided
150 :param x_axis: binning for the x-axis
151 :param label: Matplotlib label for the plot
152 """
153 if x_axis is None:
154 x_axis = transform.get_optimal_bin_size(len(x))
155 if n_bins is not None:
156 x_axis = n_bins
157
158
159 _, self.x_axis = np.histogram(x, x_axis)
160
161
162 self.mean = []
163
164
165 self.err = []
166
167
168 self.label = label
169
170 # Calculating the Profile histogram
171 for last_x, next_x in zip(self.x_axis[:-1], self.x_axis[1:]):
172 bin_range = (x > last_x) & (x < next_x)
173 n_y_in_bin = len(y[bin_range])
174 if n_y_in_bin == 0:
175 self.mean.append(0)
176 self.err.append(0)
177 else:
178 self.mean.append(np.mean(y[bin_range]))
179 self.err.append(np.sqrt(np.var(y[bin_range]) / n_y_in_bin))
180
181 def draw(self, color='black'):
182 """ Draw function
183 :param color: matplotlib color
184 """
185 bin_centers = (self.x_axis[1:] + self.x_axis[:-1]) / 2.0
186 plt.errorbar(bin_centers, self.mean, color=color, yerr=self.err,
187 linewidth=2, ecolor=color, label=self.label, fmt='.')
188
189
190def draw_flat_corr_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
191 """
192 :param df: DataFrame of the input data
193 :param pdf: optional, file to save
194 :param tight: tight layout, be careful
195 :param col_numbers: switch between numbers or names for the columns
196 :param labels: optional, list of latex labels
197 :param fontsize: size of the labels
198 """
199 assert isinstance(df, pd.DataFrame), 'Argument of wrong type!'
200
201 n_vars = np.shape(df)[1]
202
203 if labels is None:
204 labels = df.columns
205
206 fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
207 for i, row in zip(list(range(n_vars)), axes):
208 for j, ax in zip(list(range(n_vars)), row):
209 if i is j:
210 plt.sca(ax)
211 plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color="gray", histtype='step')
212 ax.set_yticklabels([])
213 set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
214 else:
215 draw_flat_correlation(df.ix[:, i], df.ix[:, j], ax=ax, draw_label=False)
216
217 if i is n_vars - 1 and j is not n_vars - 1:
218 plt.setp(ax.get_xticklabels(), visible=False)
219
220 if i is n_vars - 1:
221 ax.xaxis.set_label_coords(0.5, -0.15)
222
223 if tight:
224 plt.tight_layout()
225
226 # Common outer label
227 for i, row in zip(list(range(n_vars)), axes):
228 for j, ax in zip(list(range(n_vars)), row):
229 if i == n_vars - 1:
230 if col_numbers:
231 ax.set_xlabel("%d" % j)
232 else:
233 ax.set_xlabel(labels[j], fontsize=fontsize)
234 if j == 0:
235 if col_numbers:
236 ax.set_ylabel("%d" % i)
237 else:
238 ax.set_ylabel(labels[i], fontsize=fontsize)
239
240 if pdf is None:
241 # plt.show()
242 pass
243 else:
244 pdf.savefig()
245 plt.close()
246
247
248def draw_fancy_correlation_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
249 """
250 Draws a colored correlation matrix with a profile plot overlay.
251
252 :param df: DataFrame of the input data
253 :param pdf: optional, file to save
254 :param tight: tight layout, be carefult
255 :param col_numbers: swith bwtween numbers or names for the clumns
256 :param labels: optional, list of latex labels
257 :param fontsize: size of the labels
258 """
259
260 import matplotlib
261
262 assert isinstance(df, pd.DataFrame), 'Argument of wrong type!'
263
264 n_vars = np.shape(df)[1]
265
266 if labels is None:
267 labels = df.columns
268
269 corr = df.corr().values
270 norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
271 color = plt.cm.jet
272 cma = plt.cm.ScalarMappable(norm=norm, cmap=color)
273
274 fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
275 for i, row in zip(list(range(n_vars)), axes):
276 for j, ax in zip(list(range(n_vars)), row):
277 if i is j:
278 plt.sca(ax)
279 plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color="gray", histtype='step')
280 # plt.xlabel(df.columns[i] if isinstance(df.columns[i], basestring) else "%d" % df.columns[i])
281 ax.set_yticklabels([])
282 set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
283 else:
284 plt.sca(ax)
285
286 h = ProfilePlot(df.ix[:, i].values, df.ix[:, j].values, label='data', n_bins=10)
287 h.draw(color="white")
288
289 x_middle = (plt.xlim()[1] + plt.xlim()[0]) / 2.
290 y_middle = (plt.ylim()[1] + plt.ylim()[0]) / 2.
291
292 ax.text(x_middle, y_middle, "$%.3f$" % corr[i][j], fontsize=24, va='center', ha='center')
293
294 ax.patch.set_facecolor(cma.to_rgba(corr[i][j]))
295
296 ax.set_yticklabels([])
297 ax.set_xticklabels([])
298
299 if i is n_vars - 1 and j is not n_vars - 1:
300 plt.setp(ax.get_xticklabels(), visible=False)
301
302 if i is n_vars - 1:
303 ax.xaxis.set_label_coords(0.5, -0.15)
304
305 if tight:
306 plt.tight_layout()
307
308 # Common outer label
309 for i, row in zip(list(range(n_vars)), axes):
310 for j, ax in zip(list(range(n_vars)), row):
311 if i == n_vars - 1:
312 if col_numbers:
313 ax.set_xlabel("%d" % j)
314 else:
315 ax.set_xlabel(labels[j], fontsize=fontsize)
316 if j == 0:
317 if col_numbers:
318 ax.set_ylabel("%d" % i)
319 else:
320 ax.set_ylabel(labels[i], fontsize=fontsize)
321
322 if pdf is None:
323 # plt.show()
324 pass
325 else:
326 pdf.savefig()
327 plt.close()
def draw(self, color='black')
Definition: tools.py:181
label
Matplotlib label for the plot.
Definition: tools.py:168
err
Std of Mean y in bin x.
Definition: tools.py:165
def __init__(self, x, y, x_axis=None, n_bins=None, label=None)
Definition: tools.py:145