14 In the tools collection all plotting tools are gathered.
21 import matplotlib.pyplot
as plt
24 from scipy.stats
import chisqprob
28 def set_axis_label_range(ax, new_start, new_end, n_labels=5, axis=1, to_flat=None):
30 Set the labels to a different range
31 :param ax: axis object
32 :param new_start: New start value
33 :param new_end: New end value
34 :param n_labels: N labels
35 :param axis: default is x axis 1
36 :param to_flat: Flat transformation object for getting non linear values on the axis
39 start, end = ax.get_xlim()
41 label_position = np.append(np.arange(start, end, (end - start) / float(n_labels - 1)), end)
44 new_labels = np.append(np.arange(new_start, new_end, (new_end - new_start) / float(n_labels - 1)), new_end)
47 if to_flat
is not None:
48 assert isinstance(to_flat, transform.ToFlat)
49 x_on_flat = np.linspace(0, 1, n_labels)
52 for x, i
in zip(x_on_flat, list(range(0, n_labels))):
53 new_labels.append(to_flat.get_x(x))
54 new_labels[-1] = to_flat.max
55 new_labels[0] = to_flat.min
58 ax.set_xticks(label_position)
59 ax.set_xticklabels([
"%.2f" % i
for i
in new_labels])
61 ax.set_yticks(label_position)
62 ax.set_yticklabels([
"%.2f" % i
for i
in new_labels])
65 def draw_flat_correlation(x, y, ax=None, draw_label=True, width=5):
67 This function draws a flat correlation distribution.
68 Both x an y have to be equally sized and are transformed to a flat distribution.
70 :param x: dist x, pandas Series
71 :param y: dist y, pandas Series
72 :param ax: axis object if drawn in a subplot
73 :param draw_label: draw the labels of the distribution (only works with pandas Series)
74 :param width: width of the plot, default 5
77 not_on_axes =
True if ax
is None else False
80 fig, ax = create_figure(width=width, ratio=7 / 6.)
82 assert isinstance(x, pd.Series
or np.array),
'Argument of wrong type!'
83 assert isinstance(y, pd.Series
or np.array),
'Argument of wrong type!'
88 tx = transform.ToFlat()
89 ty = transform.ToFlat()
94 n_bins = transform.get_optimal_bin_size(min(len(x), len(y)))
95 n_bins = int(math.sqrt(n_bins) * 2)
96 nexp = len(x) / n_bins ** 2
97 nerr = math.sqrt(nexp)
98 a = np.histogram2d(tx.transform(x_val), ty.transform(y_val), bins=(n_bins, n_bins))
102 a = (a - nexp) / nerr
105 im = ax.imshow(a.T, interpolation=
'nearest', vmin=-5, vmax=5)
107 print(
"Printing colorbar")
108 plt.colorbar(im, fraction=0.046, pad=0.04)
109 set_axis_label_range(ax, x.min(), x.max(), to_flat=tx)
110 set_axis_label_range(ax, y.min(), y.max(), axis=0, to_flat=ty)
112 ax.set_xticklabels([])
113 ax.set_yticklabels([])
116 ax.set_xlabel(x.name)
117 ax.set_ylabel(y.name)
121 for i
in range(0, n_bins):
122 for j
in range(0, n_bins):
124 chi2 += a[i][j] * a[i][j]
126 proba = chisqprob(chi2, n_bins * n_bins - ((n_bins - 1) + (n_bins - 1) + 1))
128 ax.set_title(
"Probability of flat hypothesis %.2f%%" % (proba * 100))
134 """ Basic Profile plot
136 Creates the profile Histogram from x and y distrinbutions
137 It plots mean(y) in bins of x
140 x_axis (array) : Binning in x
141 mean (array) : Mean of y in bin x
142 err (array) : Std of Mean y in bin x
143 label (string) : Matplotlib label for the plot
146 def __init__(self, x, y, x_axis=None, n_bins=None, label=None):
148 :param x: Distribution in x
149 :param y: Distribution in y
150 :param n_bins: (optional) n bins in x, is set automatically if not provided
151 :param x_axis: binning for the x-axis
152 :param label: Matplotlib label for the plot
155 x_axis = transform.get_optimal_bin_size(len(x))
156 if n_bins
is not None:
160 _, self.
x_axisx_axis = np.histogram(x, x_axis)
172 for last_x, next_x
in zip(self.
x_axisx_axis[:-1], self.
x_axisx_axis[1:]):
173 bin_range = (x > last_x) & (x < next_x)
174 n_y_in_bin = len(y[bin_range])
176 self.
meanmean.append(0)
177 self.
errerr.append(0)
179 self.
meanmean.append(np.mean(y[bin_range]))
180 self.
errerr.append(np.sqrt(np.var(y[bin_range]) / n_y_in_bin))
184 :param color: matplotlib color
186 bin_centers = (self.
x_axisx_axis[1:] + self.
x_axisx_axis[:-1]) / 2.0
187 plt.errorbar(bin_centers, self.
meanmean, color=color, yerr=self.
errerr,
188 linewidth=2, ecolor=color, label=self.
labellabel, fmt=
'.')
191 def draw_flat_corr_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
193 :param df: DataFrame of the input data
194 :param pdf: optional, file to save
195 :param tight: tight layout, be careful
196 :param col_numbers: switch between numbers or names for the columns
197 :param labels: optional, list of latex labels
198 :param fontsize: size of the labels
200 assert isinstance(df, pd.DataFrame),
'Argument of wrong type!'
202 n_vars = np.shape(df)[1]
207 fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
208 for i, row
in zip(list(range(n_vars)), axes):
209 for j, ax
in zip(list(range(n_vars)), row):
212 plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color=
"gray", histtype=
'step')
213 ax.set_yticklabels([])
214 set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
216 draw_flat_correlation(df.ix[:, i], df.ix[:, j], ax=ax, draw_label=
False)
218 if i
is n_vars - 1
and j
is not n_vars - 1:
219 plt.setp(ax.get_xticklabels(), visible=
False)
222 ax.xaxis.set_label_coords(0.5, -0.15)
228 for i, row
in zip(list(range(n_vars)), axes):
229 for j, ax
in zip(list(range(n_vars)), row):
232 ax.set_xlabel(
"%d" % j)
234 ax.set_xlabel(labels[j], fontsize=fontsize)
237 ax.set_ylabel(
"%d" % i)
239 ax.set_ylabel(labels[i], fontsize=fontsize)
249 def draw_fancy_correlation_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
251 Draws a colored correlation matrix with a profile plot overlay.
253 :param df: DataFrame of the input data
254 :param pdf: optional, file to save
255 :param tight: tight layout, be carefult
256 :param col_numbers: swith bwtween numbers or names for the clumns
257 :param labels: optional, list of latex labels
258 :param fontsize: size of the labels
263 assert isinstance(df, pd.DataFrame),
'Argument of wrong type!'
265 n_vars = np.shape(df)[1]
270 corr = df.corr().values
271 norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
273 cma = plt.cm.ScalarMappable(norm=norm, cmap=color)
275 fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
276 for i, row
in zip(list(range(n_vars)), axes):
277 for j, ax
in zip(list(range(n_vars)), row):
280 plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color=
"gray", histtype=
'step')
282 ax.set_yticklabels([])
283 set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
287 h =
ProfilePlot(df.ix[:, i].values, df.ix[:, j].values, label=
'data', n_bins=10)
288 h.draw(color=
"white")
290 x_middle = (plt.xlim()[1] + plt.xlim()[0]) / 2.
291 y_middle = (plt.ylim()[1] + plt.ylim()[0]) / 2.
293 ax.text(x_middle, y_middle,
"$%.3f$" % corr[i][j], fontsize=24, va=
'center', ha=
'center')
295 ax.patch.set_facecolor(cma.to_rgba(corr[i][j]))
297 ax.set_yticklabels([])
298 ax.set_xticklabels([])
300 if i
is n_vars - 1
and j
is not n_vars - 1:
301 plt.setp(ax.get_xticklabels(), visible=
False)
304 ax.xaxis.set_label_coords(0.5, -0.15)
310 for i, row
in zip(list(range(n_vars)), axes):
311 for j, ax
in zip(list(range(n_vars)), row):
314 ax.set_xlabel(
"%d" % j)
316 ax.set_xlabel(labels[j], fontsize=fontsize)
319 ax.set_ylabel(
"%d" % i)
321 ax.set_ylabel(labels[i], fontsize=fontsize)