Belle II Software  release-06-01-15
tools.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 """ Tools collection
13 
14 In the tools collection all plotting tools are gathered.
15 
16 """
17 
18 from alignment.fancystuff import transform
19 from alignment.fancystuff.settings import create_figure
20 
21 import matplotlib.pyplot as plt
22 import pandas as pd
23 import numpy as np
24 from scipy.stats import chisqprob
25 import math
26 
27 
28 def set_axis_label_range(ax, new_start, new_end, n_labels=5, axis=1, to_flat=None):
29  """
30  Set the labels to a different range
31  :param ax: axis object
32  :param new_start: New start value
33  :param new_end: New end value
34  :param n_labels: N labels
35  :param axis: default is x axis 1
36  :param to_flat: Flat transformation object for getting non linear values on the axis
37  """
38 
39  start, end = ax.get_xlim()
40  # print start, end
41  label_position = np.append(np.arange(start, end, (end - start) / float(n_labels - 1)), end)
42 
43  # Wrong linear interploation
44  new_labels = np.append(np.arange(new_start, new_end, (new_end - new_start) / float(n_labels - 1)), new_end)
45 
46  # None linear 'correct' case using the CDF as reference
47  if to_flat is not None:
48  assert isinstance(to_flat, transform.ToFlat)
49  x_on_flat = np.linspace(0, 1, n_labels)
50  new_labels = []
51 
52  for x, i in zip(x_on_flat, list(range(0, n_labels))):
53  new_labels.append(to_flat.get_x(x))
54  new_labels[-1] = to_flat.max
55  new_labels[0] = to_flat.min
56 
57  if axis == 1:
58  ax.set_xticks(label_position)
59  ax.set_xticklabels(["%.2f" % i for i in new_labels])
60  else:
61  ax.set_yticks(label_position)
62  ax.set_yticklabels(["%.2f" % i for i in new_labels])
63 
64 
65 def draw_flat_correlation(x, y, ax=None, draw_label=True, width=5):
66  """
67  This function draws a flat correlation distribution.
68  Both x an y have to be equally sized and are transformed to a flat distribution.
69 
70  :param x: dist x, pandas Series
71  :param y: dist y, pandas Series
72  :param ax: axis object if drawn in a subplot
73  :param draw_label: draw the labels of the distribution (only works with pandas Series)
74  :param width: width of the plot, default 5
75  """
76 
77  not_on_axes = True if ax is None else False
78 
79  if ax is None:
80  fig, ax = create_figure(width=width, ratio=7 / 6.)
81 
82  assert isinstance(x, pd.Series or np.array), 'Argument of wrong type!'
83  assert isinstance(y, pd.Series or np.array), 'Argument of wrong type!'
84  x_val = x.values
85  y_val = y.values
86 
87  # Flat Distribution
88  tx = transform.ToFlat()
89  ty = transform.ToFlat()
90  tx.fit(x_val)
91  ty.fit(y_val)
92 
93  # bins and expected events
94  n_bins = transform.get_optimal_bin_size(min(len(x), len(y)))
95  n_bins = int(math.sqrt(n_bins) * 2)
96  nexp = len(x) / n_bins ** 2
97  nerr = math.sqrt(nexp)
98  a = np.histogram2d(tx.transform(x_val), ty.transform(y_val), bins=(n_bins, n_bins))
99 
100  # Transforming the matrix
101  a = np.array(a[0])
102  a = (a - nexp) / nerr
103 
104  # Draw the matrix
105  im = ax.imshow(a.T, interpolation='nearest', vmin=-5, vmax=5)
106  if not_on_axes:
107  print("Printing colorbar")
108  plt.colorbar(im, fraction=0.046, pad=0.04)
109  set_axis_label_range(ax, x.min(), x.max(), to_flat=tx)
110  set_axis_label_range(ax, y.min(), y.max(), axis=0, to_flat=ty)
111  else:
112  ax.set_xticklabels([])
113  ax.set_yticklabels([])
114 
115  if draw_label:
116  ax.set_xlabel(x.name)
117  ax.set_ylabel(y.name)
118 
119  # Calculate overall chi2 error for flat distribution
120  chi2 = 0
121  for i in range(0, n_bins):
122  for j in range(0, n_bins):
123  # a[i][j] = (a[i][j] - nexp) / nerr
124  chi2 += a[i][j] * a[i][j]
125 
126  proba = chisqprob(chi2, n_bins * n_bins - ((n_bins - 1) + (n_bins - 1) + 1))
127  if not_on_axes:
128  ax.set_title("Probability of flat hypothesis %.2f%%" % (proba * 100))
129  return im
130 
131 
132 class ProfilePlot():
133 
134  """ Basic Profile plot
135 
136  Creates the profile Histogram from x and y distrinbutions
137  It plots mean(y) in bins of x
138 
139  Attributes:
140  x_axis (array) : Binning in x
141  mean (array) : Mean of y in bin x
142  err (array) : Std of Mean y in bin x
143  label (string) : Matplotlib label for the plot
144  """
145 
146  def __init__(self, x, y, x_axis=None, n_bins=None, label=None):
147  """ init function
148  :param x: Distribution in x
149  :param y: Distribution in y
150  :param n_bins: (optional) n bins in x, is set automatically if not provided
151  :param x_axis: binning for the x-axis
152  :param label: Matplotlib label for the plot
153  """
154  if x_axis is None:
155  x_axis = transform.get_optimal_bin_size(len(x))
156  if n_bins is not None:
157  x_axis = n_bins
158 
159 
160  _, self.x_axisx_axis = np.histogram(x, x_axis)
161 
162 
163  self.meanmean = []
164 
165 
166  self.errerr = []
167 
168 
169  self.labellabel = label
170 
171  # Calculating the Profile histogram
172  for last_x, next_x in zip(self.x_axisx_axis[:-1], self.x_axisx_axis[1:]):
173  bin_range = (x > last_x) & (x < next_x)
174  n_y_in_bin = len(y[bin_range])
175  if n_y_in_bin == 0:
176  self.meanmean.append(0)
177  self.errerr.append(0)
178  else:
179  self.meanmean.append(np.mean(y[bin_range]))
180  self.errerr.append(np.sqrt(np.var(y[bin_range]) / n_y_in_bin))
181 
182  def draw(self, color='black'):
183  """ Draw function
184  :param color: matplotlib color
185  """
186  bin_centers = (self.x_axisx_axis[1:] + self.x_axisx_axis[:-1]) / 2.0
187  plt.errorbar(bin_centers, self.meanmean, color=color, yerr=self.errerr,
188  linewidth=2, ecolor=color, label=self.labellabel, fmt='.')
189 
190 
191 def draw_flat_corr_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
192  """
193  :param df: DataFrame of the input data
194  :param pdf: optional, file to save
195  :param tight: tight layout, be careful
196  :param col_numbers: switch between numbers or names for the columns
197  :param labels: optional, list of latex labels
198  :param fontsize: size of the labels
199  """
200  assert isinstance(df, pd.DataFrame), 'Argument of wrong type!'
201 
202  n_vars = np.shape(df)[1]
203 
204  if labels is None:
205  labels = df.columns
206 
207  fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
208  for i, row in zip(list(range(n_vars)), axes):
209  for j, ax in zip(list(range(n_vars)), row):
210  if i is j:
211  plt.sca(ax)
212  plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color="gray", histtype='step')
213  ax.set_yticklabels([])
214  set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
215  else:
216  draw_flat_correlation(df.ix[:, i], df.ix[:, j], ax=ax, draw_label=False)
217 
218  if i is n_vars - 1 and j is not n_vars - 1:
219  plt.setp(ax.get_xticklabels(), visible=False)
220 
221  if i is n_vars - 1:
222  ax.xaxis.set_label_coords(0.5, -0.15)
223 
224  if tight:
225  plt.tight_layout()
226 
227  # Common outer label
228  for i, row in zip(list(range(n_vars)), axes):
229  for j, ax in zip(list(range(n_vars)), row):
230  if i == n_vars - 1:
231  if col_numbers:
232  ax.set_xlabel("%d" % j)
233  else:
234  ax.set_xlabel(labels[j], fontsize=fontsize)
235  if j == 0:
236  if col_numbers:
237  ax.set_ylabel("%d" % i)
238  else:
239  ax.set_ylabel(labels[i], fontsize=fontsize)
240 
241  if pdf is None:
242  # plt.show()
243  pass
244  else:
245  pdf.savefig()
246  plt.close()
247 
248 
249 def draw_fancy_correlation_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
250  """
251  Draws a colored correlation matrix with a profile plot overlay.
252 
253  :param df: DataFrame of the input data
254  :param pdf: optional, file to save
255  :param tight: tight layout, be carefult
256  :param col_numbers: swith bwtween numbers or names for the clumns
257  :param labels: optional, list of latex labels
258  :param fontsize: size of the labels
259  """
260 
261  import matplotlib
262 
263  assert isinstance(df, pd.DataFrame), 'Argument of wrong type!'
264 
265  n_vars = np.shape(df)[1]
266 
267  if labels is None:
268  labels = df.columns
269 
270  corr = df.corr().values
271  norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
272  color = plt.cm.jet
273  cma = plt.cm.ScalarMappable(norm=norm, cmap=color)
274 
275  fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
276  for i, row in zip(list(range(n_vars)), axes):
277  for j, ax in zip(list(range(n_vars)), row):
278  if i is j:
279  plt.sca(ax)
280  plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color="gray", histtype='step')
281  # plt.xlabel(df.columns[i] if isinstance(df.columns[i], basestring) else "%d" % df.columns[i])
282  ax.set_yticklabels([])
283  set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
284  else:
285  plt.sca(ax)
286 
287  h = ProfilePlot(df.ix[:, i].values, df.ix[:, j].values, label='data', n_bins=10)
288  h.draw(color="white")
289 
290  x_middle = (plt.xlim()[1] + plt.xlim()[0]) / 2.
291  y_middle = (plt.ylim()[1] + plt.ylim()[0]) / 2.
292 
293  ax.text(x_middle, y_middle, "$%.3f$" % corr[i][j], fontsize=24, va='center', ha='center')
294 
295  ax.patch.set_facecolor(cma.to_rgba(corr[i][j]))
296 
297  ax.set_yticklabels([])
298  ax.set_xticklabels([])
299 
300  if i is n_vars - 1 and j is not n_vars - 1:
301  plt.setp(ax.get_xticklabels(), visible=False)
302 
303  if i is n_vars - 1:
304  ax.xaxis.set_label_coords(0.5, -0.15)
305 
306  if tight:
307  plt.tight_layout()
308 
309  # Common outer label
310  for i, row in zip(list(range(n_vars)), axes):
311  for j, ax in zip(list(range(n_vars)), row):
312  if i == n_vars - 1:
313  if col_numbers:
314  ax.set_xlabel("%d" % j)
315  else:
316  ax.set_xlabel(labels[j], fontsize=fontsize)
317  if j == 0:
318  if col_numbers:
319  ax.set_ylabel("%d" % i)
320  else:
321  ax.set_ylabel(labels[i], fontsize=fontsize)
322 
323  if pdf is None:
324  # plt.show()
325  pass
326  else:
327  pdf.savefig()
328  plt.close()
def draw(self, color='black')
Definition: tools.py:182
label
Matplotlib label for the plot.
Definition: tools.py:169
err
Std of Mean y in bin x.
Definition: tools.py:166
def __init__(self, x, y, x_axis=None, n_bins=None, label=None)
Definition: tools.py:146