development/doxygen/transform_8py_source.html

#!/usr/bin/env python3


""" Transformation classes


In this file all classes for the transformation methods are defined.

The base class is Transform.


"""


from alignment.fancystuff.settings import ProTool


import numpy as np

import pandas as pd

from scipy.interpolate import InterpolatedUnivariateSpline


class Transform(ProTool):


    """

    Base Class for the transformations.

    The function _fit() is overwritten by the sub classes.


    Attributes

    ----------

    n_bins : int, optional

        Binning in x, will be set automatically

    max : float

        Maximum of the fitted distribution

    min : float

        Minimum of the fitted distribution

    is_processed : bool

        Status flag

    name : str

        Name of the transformation


    """


    def __init__(self, name="Original", n_bins=None):

        """ Init function


        :param name:    Name

        :param n_bins:  Binning for the transformations

        """


        self.n_bins = n_bins


        self.max = 0


        self.min = 0


        self.is_processed = False


        self.name = name


        ProTool.__init__(self, "Transform." + self.name)


    def __init__(self, name="Original", n_bins=None): …


    def _initialise(self, x):

        """

        Sets limits for the data.

        Not called by the user.


        :param x: array type

        """

        self.io.debug("Initiating " + self.name)

        if self.n_bins is None:

            self.set_n_bins(len(x))

        self.max = np.max(x)

        self.min = np.min(x)


    def _initialise(self, x): …


    def fit(self, x, y=None):

        """

        The fit function is calls the individual _fit() functions.


        :param x:   Distribution to fit, array type

        :param y:   optional for some transformations, sets signal class

        """

        self._initialise(x)

        self._fit(x, y)

        self.is_processed = True


    def fit(self, x, y=None): …


    def __call__(self, x):

        """ Call function calls transform

        :param x:   Input data

        :return:    Transformed data

        """

        return self.transform(x)


    def __call__(self, x): …


    def _fit(self, x, y=None):

        """

        This is defined in the children and overwritten.

        :param x:   array x values

        :param y:   class variable [1,0]


        """


    def _fit(self, x, y=None): …


    def transform(self, x, set_limits=False):

        """

        This is defined in the children and overwritten.

        :param x:           Distribution to transform, array type

        :param set_limits:  Limits the range of the data to the fitted range

        :return:            Transformed data

        """

        if set_limits:

            self.set_limits(x)

        return self._transform(x)


    def transform(self, x, set_limits=False): …


    def _transform(self, x):

        """

        This is defined in the children and overwritten.

        In the base class it does nothing and returns the original distribution.


        :param x:   Distribution to transform, array type

        :return:    Transformed data

        """

        return x


    def _transform(self, x): …


    def set_n_bins(self, n):

        """

        Calculates the optimal size for the binning.

        :param n:   Length of the input data

        """

        self.n_bins = get_optimal_bin_size(n)

        self.io.debug("Bins are set to " + str(self.n_bins) + "\t " + str(n / float(self.n_bins)) + "per bin")


    def set_n_bins(self, n): …


    def set_limits(self, x):

        """

        Limits the data to the fitted range.

        :param x:   Input data

        :return:    Limited data

        """

        try:

            _ = len(x)  # to catch exception

            x[x > self.max] = self.max

            x[x < self.min] = self.min

        except TypeError:

            if x < self.min:

                x = self.min

            if x > self.max:

                x = self.max

        return x


    def set_limits(self, x): …

class Transform(ProTool): …

def get_optimal_bin_size(n):

    """

    This function calculates the optimal amount of bins for the number of events n.

    :param      n:  number of Events

    :return:        optimal bin size


    """

    return int(3 * n ** (1 / 3.0))


def get_average_in_bins(n):

    """

    Returns the expected amount of entries in each bins.

    :param n:   Length of the data

    :return:    Length of the data divided by the optimal bin size

    """

    return n / float(get_optimal_bin_size(n))


class CDF(Transform):


    """

    Calculates the cumulative distribution (CDF)

    Can be used for the flat transformation.


    Attributes

    ----------

    spline : InterpolatedUnivariateSpline

        Spline, fitting the CDF


    """


    def __init__(self, *args):

        """ Init function


        :param args: None

        """

        Transform.__init__(self, "CDF", *args)


        self.spline = None


    def __init__(self, *args): …


    def _fit(self, x, y=None):

        """

        Fit function calculates the cumulative distribution with numpy percentile.


        :param x:   Input distribution

        :param y:   Will not be used in this transformation

        """

        self.io.debug("Fitting CDF")

        y_ = np.linspace(0, 100, 2 * self.n_bins)

        x_ = pd.Series(np.percentile(x, list(y_)))


        # Count same values

        vc = x_.value_counts()

        vc = vc.sort_index()


        # replace same values

        for i, xi in enumerate(vc):

            if xi > 1:

                try:

                    nex_val = vc.index[i + 1]

                except IndexError:

                    nex_val = vc.index[i] + 0.01

                fill = np.linspace(vc.index[i], nex_val, xi)

                x_[x_ == vc.index[i]] = fill

        self.spline = InterpolatedUnivariateSpline(x_, y_)


    def _fit(self, x, y=None): …


    def _transform(self, x):

        """

        Transforms the input data according to the cdf.

        :param x:   Input data

        :return:    Transformed data

        """

        x = self.set_limits(x)

        return self.spline(x)


    def _transform(self, x): …

class CDF(Transform): …


class ToFlat(Transform):


    """

    This transformation uses the CDF to transform input data to a

    flat transformation.


    Attributes

    ----------

    cdf : Transform.CDF

        Transformation with the CDF


    """


    def __init__(self, *args):

        """ Init function


        :param args: None

        """

        Transform.__init__(self, "Flat", *args)


        self.cdf = CDF(*args)


    def __init__(self, *args): …


    def _fit(self, x, y=None):

        """

        Fit function calculates the cumulative distribution with numpy percentile.


        :param x:   Input distribution

        :param y:   Will not be used in this transformation

        """

        self.io.debug("Fitting Flat")

        self.cdf.fit(x)


    def _fit(self, x, y=None): …


    def _transform(self, x):

        """

        Transforms the input data according to the cdf.

        :param x:   Input data

        :return:    Transformed data

        """

        if not self.is_processed:

            self.fit(x)

        return self.cdf.transform(x)


    def _transform(self, x): …


    def get_flat_bins(self):

        """

        Returns the binning of the CDF

        :return: Binning for a flat distribution

        """

        return self.cdf.x


    def get_flat_bins(self): …


    def get_x(self, x_flat):

        """

        Dirty version for getting the original x value out of a flat x value.

        :param x_flat:  x value in the flat distribution

        :return:        x value on the original axis (approx)

        """

        x_cumul = np.linspace(self.min, self.max, self.n_bins * 50)

        for xx in x_cumul:

            if self.cdf.spline(xx) > x_flat:

                return xx

    def get_x(self, x_flat): …

class ToFlat(Transform): …

alignment.fancystuff.settings.ProTool
Definition settings.py:26

alignment.fancystuff.settings.ProTool.io
io(self)
Definition settings.py:49

alignment.fancystuff.settings.ProTool.name
name
Name of the class.
Definition settings.py:46

alignment.fancystuff.transform.CDF
Definition transform.py:176

alignment.fancystuff.transform.CDF._fit
_fit(self, x, y=None)
Definition transform.py:199

alignment.fancystuff.transform.CDF.__init__
__init__(self, *args)
Definition transform.py:189

alignment.fancystuff.transform.CDF._transform
_transform(self, x)
Definition transform.py:225

alignment.fancystuff.transform.CDF.spline
spline
Spline, fitting the CDF.
Definition transform.py:197

alignment.fancystuff.transform.ToFlat
Definition transform.py:235

alignment.fancystuff.transform.ToFlat.cdf
cdf
Transformation with the CDF.
Definition transform.py:256

alignment.fancystuff.transform.ToFlat.get_x
get_x(self, x_flat)
Definition transform.py:285

alignment.fancystuff.transform.ToFlat._fit
_fit(self, x, y=None)
Definition transform.py:258

alignment.fancystuff.transform.ToFlat.__init__
__init__(self, *args)
Definition transform.py:248

alignment.fancystuff.transform.ToFlat._transform
_transform(self, x)
Definition transform.py:268

alignment.fancystuff.transform.ToFlat.get_flat_bins
get_flat_bins(self)
Definition transform.py:278

alignment.fancystuff.transform.Transform
Definition transform.py:27

alignment.fancystuff.transform.Transform.transform
transform(self, x, set_limits=False)
Definition transform.py:110

alignment.fancystuff.transform.Transform.n_bins
n_bins
Binning in x, will be set automatically.
Definition transform.py:55

alignment.fancystuff.transform.Transform.set_limits
set_limits(self, x)
Definition transform.py:139

alignment.fancystuff.transform.Transform.min
int min
Minimum of the fitted distribution.
Definition transform.py:61

alignment.fancystuff.transform.Transform.__init__
__init__(self, name="Original", n_bins=None)
Definition transform.py:48

alignment.fancystuff.transform.Transform.set_n_bins
set_n_bins(self, n)
Definition transform.py:131

alignment.fancystuff.transform.Transform._fit
_fit(self, x, y=None)
Definition transform.py:102

alignment.fancystuff.transform.Transform._transform
_transform(self, x)
Definition transform.py:121

alignment.fancystuff.transform.Transform.fit
fit(self, x, y=None)
Definition transform.py:84

alignment.fancystuff.transform.Transform.is_processed
bool is_processed
Status flag.
Definition transform.py:64

alignment.fancystuff.transform.Transform._initialise
_initialise(self, x)
Definition transform.py:71

alignment.fancystuff.transform.Transform.__call__
__call__(self, x)
Definition transform.py:95

alignment.fancystuff.transform.Transform.max
int max
Maximum of the fitted distribution.
Definition transform.py:58

alignment.fancystuff.settings
Definition settings.py:1