13 In the tools collection all plotting tools are gathered. 
   20 import matplotlib.pyplot 
as plt
 
   23 from scipy.stats 
import chisqprob
 
   27 def set_axis_label_range(ax, new_start, new_end, n_labels=5, axis=1, to_flat=None):
 
   29     Set the labels to a different range 
   30     :param ax:          axis object 
   31     :param new_start:   New start value 
   32     :param new_end:     New end value 
   33     :param n_labels:    N labels 
   34     :param axis:        default is x axis 1 
   35     :param to_flat:     Flat transformation object for getting non linear values on the axis 
   38     start, end = ax.get_xlim()
 
   40     label_position = np.append(np.arange(start, end, (end - start) / float(n_labels - 1)), end)
 
   43     new_labels = np.append(np.arange(new_start, new_end, (new_end - new_start) / float(n_labels - 1)), new_end)
 
   46     if to_flat 
is not None:
 
   47         assert isinstance(to_flat, transform.ToFlat)
 
   48         x_on_flat = np.linspace(0, 1, n_labels)
 
   51         for x, i 
in zip(x_on_flat, list(range(0, n_labels))):
 
   52             new_labels.append(to_flat.get_x(x))
 
   53         new_labels[-1] = to_flat.max
 
   54         new_labels[0] = to_flat.min
 
   57         ax.set_xticks(label_position)
 
   58         ax.set_xticklabels([
"%.2f" % i 
for i 
in new_labels])
 
   60         ax.set_yticks(label_position)
 
   61         ax.set_yticklabels([
"%.2f" % i 
for i 
in new_labels])
 
   64 def draw_flat_correlation(x, y, ax=None, draw_label=True, width=5):
 
   66     This function draws a flat correlation distribution. 
   67     Both x an y have to be equally sized and are transformed to a flat distribution. 
   69     :param x:           dist x, pandas Series 
   70     :param y:           dist y, pandas Series 
   71     :param ax:          axis object if drawn in a subplot 
   72     :param draw_label:  draw the labels of the distribution (only works with pandas Series) 
   73     :param width:       width of the plot, default 5 
   76     not_on_axes = 
True if ax 
is None else False 
   79         fig, ax = create_figure(width=width, ratio=7 / 6.)
 
   81     assert isinstance(x, pd.Series 
or np.array), 
'Argument of wrong type!' 
   82     assert isinstance(y, pd.Series 
or np.array), 
'Argument of wrong type!' 
   87     tx = transform.ToFlat()
 
   88     ty = transform.ToFlat()
 
   93     n_bins = transform.get_optimal_bin_size(min(len(x), len(y)))
 
   94     n_bins = int(math.sqrt(n_bins) * 2)
 
   95     nexp = len(x) / n_bins ** 2
 
   96     nerr = math.sqrt(nexp)
 
   97     a = np.histogram2d(tx.transform(x_val), ty.transform(y_val), bins=(n_bins, n_bins))
 
  101     a = (a - nexp) / nerr
 
  104     im = ax.imshow(a.T, interpolation=
'nearest', vmin=-5, vmax=5)
 
  106         print(
"Printing colorbar")
 
  107         plt.colorbar(im, fraction=0.046, pad=0.04)
 
  108         set_axis_label_range(ax, x.min(), x.max(), to_flat=tx)
 
  109         set_axis_label_range(ax, y.min(), y.max(), axis=0, to_flat=ty)
 
  111         ax.set_xticklabels([])
 
  112         ax.set_yticklabels([])
 
  115         ax.set_xlabel(x.name)
 
  116         ax.set_ylabel(y.name)
 
  120     for i 
in range(0, n_bins):
 
  121         for j 
in range(0, n_bins):
 
  123             chi2 += a[i][j] * a[i][j]
 
  125     proba = chisqprob(chi2, n_bins * n_bins - ((n_bins - 1) + (n_bins - 1) + 1))
 
  127         ax.set_title(
"Probability of flat hypothesis %.2f%%" % (proba * 100))
 
  133     """ Basic Profile plot 
  135     Creates the profile Histogram from x and y distrinbutions 
  136     It plots mean(y) in bins of x 
  139         x_axis (array)  : Binning in x 
  140         mean (array)    : Mean of y in bin x 
  141         err (array)     : Std of Mean y in bin x 
  142         label (string)  : Matplotlib label for the plot 
  145     def __init__(self, x, y, x_axis=None, n_bins=None, label=None):
 
  147         :param x:       Distribution in x 
  148         :param y:       Distribution in y 
  149         :param n_bins:  (optional) n bins in x, is set automatically if not provided 
  150         :param x_axis:  binning for the x-axis 
  151         :param label:   Matplotlib label for the plot 
  154             x_axis = transform.get_optimal_bin_size(len(x))
 
  155         if n_bins 
is not None:
 
  159         _, self.
x_axisx_axis = np.histogram(x, x_axis)
 
  171         for last_x, next_x 
in zip(self.
x_axisx_axis[:-1], self.
x_axisx_axis[1:]):
 
  172             bin_range = (x > last_x) & (x < next_x)
 
  173             n_y_in_bin = len(y[bin_range])
 
  175                 self.
meanmean.append(0)
 
  176                 self.
errerr.append(0)
 
  178                 self.
meanmean.append(np.mean(y[bin_range]))
 
  179                 self.
errerr.append(np.sqrt(np.var(y[bin_range]) / n_y_in_bin))
 
  183         :param color: matplotlib color 
  185         bin_centers = (self.
x_axisx_axis[1:] + self.
x_axisx_axis[:-1]) / 2.0
 
  186         plt.errorbar(bin_centers, self.
meanmean, color=color, yerr=self.
errerr,
 
  187                      linewidth=2, ecolor=color, label=self.
labellabel, fmt=
'.')
 
  190 def draw_flat_corr_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
 
  192     :param df:          DataFrame of the input data 
  193     :param pdf:         optional, file to save 
  194     :param tight:       tight layout, be careful 
  195     :param col_numbers: switch between numbers or names for the columns 
  196     :param labels:      optional, list of latex labels 
  197     :param fontsize:    size of the labels 
  199     assert isinstance(df, pd.DataFrame), 
'Argument of wrong type!' 
  201     n_vars = np.shape(df)[1]
 
  206     fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
 
  207     for i, row 
in zip(list(range(n_vars)), axes):
 
  208         for j, ax 
in zip(list(range(n_vars)), row):
 
  211                 plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color=
"gray", histtype=
'step')
 
  212                 ax.set_yticklabels([])
 
  213                 set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
 
  215                 draw_flat_correlation(df.ix[:, i], df.ix[:, j], ax=ax, draw_label=
False)
 
  217             if i 
is n_vars - 1 
and j 
is not n_vars - 1:
 
  218                 plt.setp(ax.get_xticklabels(), visible=
False)
 
  221                 ax.xaxis.set_label_coords(0.5, -0.15)
 
  227     for i, row 
in zip(list(range(n_vars)), axes):
 
  228         for j, ax 
in zip(list(range(n_vars)), row):
 
  231                     ax.set_xlabel(
"%d" % j)
 
  233                     ax.set_xlabel(labels[j], fontsize=fontsize)
 
  236                     ax.set_ylabel(
"%d" % i)
 
  238                     ax.set_ylabel(labels[i], fontsize=fontsize)
 
  248 def draw_fancy_correlation_matrix(df, pdf=None, tight=False, col_numbers=False, labels=None, fontsize=18, size=12):
 
  250     Draws a colored correlation matrix with a profile plot overlay. 
  252     :param df:          DataFrame of the input data 
  253     :param pdf:         optional, file to save 
  254     :param tight:       tight layout, be carefult 
  255     :param col_numbers: swith bwtween numbers or names for the clumns 
  256     :param labels:      optional, list of latex labels 
  257     :param fontsize:    size of the labels 
  262     assert isinstance(df, pd.DataFrame), 
'Argument of wrong type!' 
  264     n_vars = np.shape(df)[1]
 
  269     corr = df.corr().values
 
  270     norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
 
  272     cma = plt.cm.ScalarMappable(norm=norm, cmap=color)
 
  274     fig, axes = plt.subplots(nrows=n_vars, ncols=n_vars, figsize=(size, size))
 
  275     for i, row 
in zip(list(range(n_vars)), axes):
 
  276         for j, ax 
in zip(list(range(n_vars)), row):
 
  279                 plt.hist(df.ix[:, i].values, transform.get_optimal_bin_size(len(df)), color=
"gray", histtype=
'step')
 
  281                 ax.set_yticklabels([])
 
  282                 set_axis_label_range(ax, df.ix[:, i].min(), df.ix[:, i].max(), n_labels=3)
 
  286                 h = 
ProfilePlot(df.ix[:, i].values, df.ix[:, j].values, label=
'data', n_bins=10)
 
  287                 h.draw(color=
"white")
 
  289                 x_middle = (plt.xlim()[1] + plt.xlim()[0]) / 2.
 
  290                 y_middle = (plt.ylim()[1] + plt.ylim()[0]) / 2.
 
  292                 ax.text(x_middle, y_middle, 
"$%.3f$" % corr[i][j], fontsize=24, va=
'center', ha=
'center')
 
  294                 ax.patch.set_facecolor(cma.to_rgba(corr[i][j]))
 
  296                 ax.set_yticklabels([])
 
  297                 ax.set_xticklabels([])
 
  299             if i 
is n_vars - 1 
and j 
is not n_vars - 1:
 
  300                 plt.setp(ax.get_xticklabels(), visible=
False)
 
  303                 ax.xaxis.set_label_coords(0.5, -0.15)
 
  309     for i, row 
in zip(list(range(n_vars)), axes):
 
  310         for j, ax 
in zip(list(range(n_vars)), row):
 
  313                     ax.set_xlabel(
"%d" % j)
 
  315                     ax.set_xlabel(labels[j], fontsize=fontsize)
 
  318                     ax.set_ylabel(
"%d" % i)
 
  320                     ax.set_ylabel(labels[i], fontsize=fontsize)