Belle II Software  release-08-01-10
tools.py
1 #!/usr/bin/env python3
2 
3 
10 
11 """ Tools collection
12 
13 In the tools collection all plotting tools are gathered.
14 
15 """
16 
17 from alignment.fancystuff import transform
18 from alignment.fancystuff.settings import create_figure
19 
20 import matplotlib.pyplot as plt
21 import pandas as pd
22 import numpy as np
23 from scipy.stats import chisqprob
24 import math
25 
26 
27 def set_axis_label_range(ax, new_start, new_end, n_labels=5, axis=1, to_flat=None):
28  """
29  Set the labels to a different range
30  :param ax: axis object
31  :param new_start: New start value
32  :param new_end: New end value
33  :param n_labels: N labels
34  :param axis: default is x axis 1
35  :param to_flat: Flat transformation object for getting non linear values on the axis
36  """
37 
38  start, end = ax.get_xlim()
39  # print start, end
40  label_position = np.append(np.arange(start, end, (end - start) / float(n_labels - 1)), end)
41 
42  # Wrong linear interploation
43  new_labels = np.append(np.arange(new_start, new_end, (new_end - new_start) / float(n_labels - 1)), new_end)
44 
45  # None linear 'correct' case using the CDF as reference
46  if to_flat is not None:
47  assert isinstance(to_flat, transform.ToFlat)
48  x_on_flat = np.linspace(0, 1, n_labels)
49  new_labels = []
50 
51  for x, i in zip(x_on_flat, list(range(0, n_labels))):
52  new_labels.append(to_flat.get_x(x))
53  new_labels[-1] = to_flat.max
54  new_labels[0] = to_flat.min
55 
56  if axis == 1:
57  ax.set_xticks(label_position)
58  ax.set_xticklabels(["%.2f" % i for i in new_labels])
59  else:
60  ax.set_yticks(label_position)
61  ax.set_yticklabels(["%.2f" % i for i in new_labels])
62 
63 
64 def draw_flat_correlation(x, y, ax=None, draw_label=True, width=5):
65  """
66  This function draws a flat correlation distribution.
67  Both x an y have to be equally sized and are transformed to a flat distribution.
68 
69  :param x: dist x, pandas Series
70  :param y: dist y, pandas Series
71  :param ax: axis object if drawn in a subplot
72  :param draw_label: draw the labels of the distribution (only works with pandas Series)
73  :param width: width of the plot, default 5
74  """
75 
76  not_on_axes = True if ax is None else False
77 
78  if ax is None:
79  fig, ax = create_figure(width=width, ratio=7 / 6.)
80 
81  assert isinstance(x, pd.Series or np.array), 'Argument of wrong type!'
82  assert isinstance(y, pd.Series or np.array), 'Argument of wrong type!'
83  x_val = x.values
84  y_val = y.values
85 
86  # Flat Distribution
87  tx = transform.ToFlat()
88  ty = transform.ToFlat()
89  tx.fit(x_val)
90  ty.fit(y_val)
91 
92  # bins and expected events
93  n_bins = transform.get_optimal_bin_size(min(len(x), len(y)))
94  n_bins = int(math.sqrt(n_bins) * 2)
95  nexp = len(x) / n_bins ** 2
96  nerr = math.sqrt(nexp)
97  a = np.histogram2d(tx.transform(x_val), ty.transform(y_val), bins=(n_bins, n_bins))
98 
99  # Transforming the matrix
100  a = np.array(a[0])
101  a = (a - nexp) / nerr
102 
103  # Draw the matrix
104  im = ax.imshow(a.T, interpolation='nearest', vmin=-5, vmax=5)
105  if not_on_axes:
106  print("Printing colorbar")
107  plt.colorbar(im, fraction=0.046, pad=0.04)
108  set_axis_label_range(ax, x.min(), x.max(), to_flat=tx)
109  set_axis_label_range(ax, y.min(), y.max(), axis=0, to_flat=ty)
110  else:
111  ax.set_xticklabels([])
112  ax.set_yticklabels([])
113 
114  if draw_label:
115  ax.set_xlabel(x.name)
116  ax.set_ylabel(y.name)
117 
118  # Calculate overall chi2 error for flat distribution
119  chi2 = 0
120  for i in range(0, n_bins):
121  for j in range(0, n_bins):
122  # a[i][j] = (a[i][j] - nexp) / nerr
123  chi2 += a[i][j] * a[i][j]
124 
125  proba = chisqprob(chi2, n_bins * n_bins - ((n_bins - 1) + (n_bins - 1) + 1))
126  if not_on_axes:
127  ax.set_title("Probability of flat hypothesis %.2f%%" % (proba * 100))
128  return im
129 
130 
131 class ProfilePlot():
132 
133  """ Basic Profile plot
134 
135  Creates the profile Histogram from x and y distrinbutions
136  It plots mean(y) in bins of x
137 
138  Attributes:
139  x_axis (array) : Binning in x
140  mean (array) : Mean of y in bin x
141  err (array) : Std of Mean y in bin x
142  label (string) : Matplotlib label for the plot
143  """
144 
145  def __init__(self, x, y, x_axis=None, n_bins=None, label=None):
146  """ init function
147  :param x: Distribution in x
148  :param y: Distribution in y
149  :param n_bins: (optional) n bins in x, is set automatically if not provided
150  :param x_axis: binning for the x-axis
151  :param label: Matplotlib label for the plot
152  """
153  if x_axis is None:
154  x_axis = transform.get_optimal_bin_size(len(x))
155  if n_bins is not None:
156  x_axis = n_bins
157 
158 
159  _, self.x_axisx_axis = np.histogram(x, x_axis)
160 
161 
162  self.meanmean = []
163 
164 
165  self.errerr = []
166 
167 
168  self.labellabel = label
169 
170  # Calculating the Profile histogram
171  for last_x, next_x in zip(self.x_axisx_axis[:-1], self.x_axisx_axis[1:]):
172  bin_range = (x > last_x) & (x < next_x)
173  n_y_in_bin = len(y[bin_range])
174  if n_y_in_bin == 0:
175  self.meanmean.append(0)
176  self.errerr.append(0)
177  else:
178  self.meanmean.append(np.mean(y[bin_range]))
179  self.errerr.append(np.sqrt(np.var(y[bin_range]) / n_y_in_bin))
180 
181  def draw(self, color='black'):
182  """ Draw function
183  :param color: matplotlib color
184  """
185  bin_centers = (self.x_axisx_axis[1:] + self.x_axisx_axis[:-1]) / 2.0
186  plt.errorbar(bin_centers, self.meanmean, color=color, yerr=self.errerr,
187  linewidth=2, ecolor=color, label=self.labellabel, fmt='.')
188 
189 
190 def draw_flat_corr_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
191  """
192  :param df: DataFrame of the input data
193  :param pdf: optional, file to save
194  :param tight: tight layout, be careful
195  :param col_numbers: switch between numbers or names for the columns
196  :param labels: optional, list of latex labels
197  :param fontsize: size of the labels
198  """
199  assert isinstance(df, pd.DataFrame), 'Argument of wrong type!'
200 
201  n_vars = np.shape(df)[1]
202 
203  if labels is None:
204  labels = df.columns
205 
206  fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
207  for i, row in zip(list(range(n_vars)), axes):
208  for j, ax in zip(list(range(n_vars)), row):
209  if i is j:
210  plt.sca(ax)
211  plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color="gray", histtype='step')
212  ax.set_yticklabels([])
213  set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
214  else:
215  draw_flat_correlation(df.ix[:, i], df.ix[:, j], ax=ax, draw_label=False)
216 
217  if i is n_vars - 1 and j is not n_vars - 1:
218  plt.setp(ax.get_xticklabels(), visible=False)
219 
220  if i is n_vars - 1:
221  ax.xaxis.set_label_coords(0.5, -0.15)
222 
223  if tight:
224  plt.tight_layout()
225 
226  # Common outer label
227  for i, row in zip(list(range(n_vars)), axes):
228  for j, ax in zip(list(range(n_vars)), row):
229  if i == n_vars - 1:
230  if col_numbers:
231  ax.set_xlabel("%d" % j)
232  else:
233  ax.set_xlabel(labels[j], fontsize=fontsize)
234  if j == 0:
235  if col_numbers:
236  ax.set_ylabel("%d" % i)
237  else:
238  ax.set_ylabel(labels[i], fontsize=fontsize)
239 
240  if pdf is None:
241  # plt.show()
242  pass
243  else:
244  pdf.savefig()
245  plt.close()
246 
247 
248 def draw_fancy_correlation_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
249  """
250  Draws a colored correlation matrix with a profile plot overlay.
251 
252  :param df: DataFrame of the input data
253  :param pdf: optional, file to save
254  :param tight: tight layout, be carefult
255  :param col_numbers: swith bwtween numbers or names for the clumns
256  :param labels: optional, list of latex labels
257  :param fontsize: size of the labels
258  """
259 
260  import matplotlib
261 
262  assert isinstance(df, pd.DataFrame), 'Argument of wrong type!'
263 
264  n_vars = np.shape(df)[1]
265 
266  if labels is None:
267  labels = df.columns
268 
269  corr = df.corr().values
270  norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
271  color = plt.cm.jet
272  cma = plt.cm.ScalarMappable(norm=norm, cmap=color)
273 
274  fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
275  for i, row in zip(list(range(n_vars)), axes):
276  for j, ax in zip(list(range(n_vars)), row):
277  if i is j:
278  plt.sca(ax)
279  plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color="gray", histtype='step')
280  # plt.xlabel(df.columns[i] if isinstance(df.columns[i], basestring) else "%d" % df.columns[i])
281  ax.set_yticklabels([])
282  set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
283  else:
284  plt.sca(ax)
285 
286  h = ProfilePlot(df.ix[:, i].values, df.ix[:, j].values, label='data', n_bins=10)
287  h.draw(color="white")
288 
289  x_middle = (plt.xlim()[1] + plt.xlim()[0]) / 2.
290  y_middle = (plt.ylim()[1] + plt.ylim()[0]) / 2.
291 
292  ax.text(x_middle, y_middle, "$%.3f$" % corr[i][j], fontsize=24, va='center', ha='center')
293 
294  ax.patch.set_facecolor(cma.to_rgba(corr[i][j]))
295 
296  ax.set_yticklabels([])
297  ax.set_xticklabels([])
298 
299  if i is n_vars - 1 and j is not n_vars - 1:
300  plt.setp(ax.get_xticklabels(), visible=False)
301 
302  if i is n_vars - 1:
303  ax.xaxis.set_label_coords(0.5, -0.15)
304 
305  if tight:
306  plt.tight_layout()
307 
308  # Common outer label
309  for i, row in zip(list(range(n_vars)), axes):
310  for j, ax in zip(list(range(n_vars)), row):
311  if i == n_vars - 1:
312  if col_numbers:
313  ax.set_xlabel("%d" % j)
314  else:
315  ax.set_xlabel(labels[j], fontsize=fontsize)
316  if j == 0:
317  if col_numbers:
318  ax.set_ylabel("%d" % i)
319  else:
320  ax.set_ylabel(labels[i], fontsize=fontsize)
321 
322  if pdf is None:
323  # plt.show()
324  pass
325  else:
326  pdf.savefig()
327  plt.close()
def draw(self, color='black')
Definition: tools.py:181
label
Matplotlib label for the plot.
Definition: tools.py:168
err
Std of Mean y in bin x.
Definition: tools.py:165
def __init__(self, x, y, x_axis=None, n_bins=None, label=None)
Definition: tools.py:145