Source code for statinf.data.ProcessData

import numpy as np
import pandas as pd
import re
import warnings
from types import SimpleNamespace


# Ranking data

[docs]def rankdata(x):
    """Assigns rank to data.
    This is mainly used for analysis like Spearman's correlation.

    :param x: Input vector. Format can be :obj:`numpy.array`, :obj:`list` or :obj:`pandas.Series`.
    :type x: :obj:`numpy.array`

    :example:

    >>> rankdata([2., 5.44, 3.93, 3.3, 1.1])
    ... array([1, 4, 3, 2, 0])

    :return: Vector with ranked values.
    :rtype: :obj:`numpy.array`
    """
    x_arr = np.asarray(x)
    sorted_array = sorted(x_arr)
    rk = [sorted_array.index(i) for i in x_arr]
    return np.array(rk)


#######################################################################################################################

# Parse formula and data transformations

[docs]def parse_formula(formula, data, check_values=True, return_all=False):
    """This function is used in regression models in order to apply transformations on the data from a formula.
    It allows to apply transformations from a :obj:`str` formula. See below for examples.

    :param formula: Regression formula to be run of the form :obj:`y ~ x1 + x2`. Accepted functions are:

        * :math:`\\log(x)` \\: :obj:`log(X)`
        * :math:`\\exp(x)` \\: :obj:`exp(X)`
        * :math:`\\sqrt{x}` \\: :obj:`sqrt(X)`
        * :math:`\\cos(x)` \\: :obj:`cos(X)`
        * :math:`\\sin(x)` \\: :obj:`sin(X)`
        * :math:`x^{z}` \\: :obj:`X ** Z`
        * :math:`\\dfrac{x}{z}` \\: :obj:`X/Z`
        * :math:`x \\times z` \\: :obj:`X*Z`

    :type formula: :obj:`str`
    :param data: Data on which to perform the transformations.
    :type data: :obj:`pandas.DataFrame`
    :param check_values: For each transformation check whether the data range satisfy the domain definition of the function, defaults to True.
    :type check_values: bool, optional
    :param return_all: Returns the transformed data, column :obj:`Y` and columns :obj:`X`, defaults to False.
    :type return_all: bool, optional

    :example:

    >>> from statinf.data import parse_formula
    >>> print(input_df)
    ... +-----------+-----------+-----------+
    ... |        X1 |        X2 |         Y |
    ... +-----------+-----------+-----------+
    ... |  0.555096 |  0.681083 | -1.383428 |
    ... |  1.155661 |  0.391129 | -7.780989 |
    ... | -0.299251 | -0.445602 | -8.146673 |
    ... | -0.978311 |  1.312146 |  8.653818 |
    ... | -0.225917 |  0.522016 | -9.684332 |
    ... +-----------+-----------+-----------+
    >>> form = 'Y ~ X1 + X2 + exp(X2) + X1*X2'
    >>> new_df = parse_formula(form, data=input_df)
    >>> print(new_df)
    ... +-----------+-----------+-----------+-----------+-----------+
    ... |        X1 |        X2 |         Y |   exp(X2) |     X1*X2 |
    ... +-----------+-----------+-----------+-----------+-----------+
    ... |  0.555096 |  0.681083 | -1.383428 |  1.976017 |  0.378066 |
    ... |  1.155661 |  0.391129 | -7.780989 |  1.478649 |  0.452012 |
    ... | -0.299251 | -0.445602 | -8.146673 |  0.640438 |  0.133347 |
    ... | -0.978311 |  1.312146 |  8.653818 |  3.714134 | -1.283687 |
    ... | -0.225917 |  0.522016 | -9.684332 |  1.685422 | -0.117932 |
    ... +-----------+-----------+-----------+-----------+-----------+

    :raises ValueError: Returns an error when the data cannot satisfy the domain definition for the required transformation.

    :return: Transformed data set
    :rtype: :obj:`pandas.DataFrame`
    """
    warnings.filterwarnings('ignore')
    # Parse formula
    no_space_formula = formula.replace(' ', '')
    Y_col = no_space_formula.split('~')[0]
    X_col = no_space_formula.split('~')[1].split('+')

    # Non-linear transformations
    log_cols = [re.search('(?<=log\\().*?(?=\\))', x).group(0) for x in X_col if re.findall('log\\(', x)]  # log
    exp_cols = [re.search('(?<=exp\\().*?(?=\\))', x).group(0) for x in X_col if re.findall('exp\\(', x)]  # exp
    sqrt_cols = [re.search('(?<=sqrt\\().*?(?=\\))', x).group(0) for x in X_col if re.findall('sqrt\\(', x)]  # sqrt
    cos_cols = [re.search('(?<=cos\\().*?(?=\\))', x).group(0) for x in X_col if re.findall('cos\\(', x)]  # cos
    sin_cols = [re.search('(?<=sin\\().*?(?=\\))', x).group(0) for x in X_col if re.findall('sin\\(', x)]  # sin

    # Transformation functions
    transformations_functional = {'log': {'func': np.log, 'cols': log_cols},
                                  'exp': {'func': np.exp, 'cols': exp_cols},
                                  'cos': {'func': np.cos, 'cols': cos_cols},
                                  'sin': {'func': np.sin, 'cols': sin_cols},
                                  'sqrt': {'func': np.sqrt, 'cols': sqrt_cols},
                                  }
    # Apply transformations
    for key, transformation in transformations_functional.items():
        for c in transformation['cols']:
            col_to_transform = c  # .split('(')[1].split(')')[0]
            # Transform
            data.loc[:, f'{key}({col_to_transform})'] = transformation['func'](data[col_to_transform])

    # Multiplications, power and ration functions
    pow_cols = [x for x in X_col if re.findall('[a-zA-Z0-9\\(\\)][*][*][a-zA-Z0-9]', x)]  # X1 ** x
    inter_cols = [x for x in X_col if re.findall('[a-zA-Z0-9\\(\\)][*][a-zA-Z0-9]', x)]  # X1 * X2
    div_cols = [x for x in X_col if re.findall('[a-zA-Z0-9\\(\\)][/][a-zA-Z0-9]', x)]  # X1 / X2

    # Exponents
    for c in pow_cols:
        c_left = c.split('**')[0]
        c_power = c.split('**')[1]
        # Get components as number or column from data
        left = data[c_left].values if c_left in data.columns else float(c_left)
        power = data[c_power].values if c_power in data.columns else float(c_power)
        # Transform
        data.loc[:, c] = left ** power
    # Multiplications
    for c in inter_cols:
        c_left = c.split('*')[0]
        c_right = c.split('*')[1]
        # Get components as number or column from data
        try:
            left = data[c_left].values if c_left in list(data.columns) + X_col else float(c_left)
            right = data[c_right].values if c_right in list(data.columns) + X_col else float(c_right)
        except Exception:
            raise ValueError(f'Columns {c_left} or {c_right} not found in data.')
        # Transform
        data.loc[:, c] = left * right
    # Divide
    for c in div_cols:
        c_num = c.split('/')[0]
        c_denom = c.split('/')[1]
        # Get components as number or column from data
        num = data[c_num].values if c_num in list(data.columns) + X_col else float(c_num)
        denom = data[c_denom].values if c_denom in list(data.columns) + X_col else float(c_denom)
        if check_values:
            assert (denom == 0.).sum() == 0, f'Column {col_to_transform} contains null values.'
        # Transform
        data.loc[:, c] = num / denom
    if '1' in X_col:
        data['1'] = 1

    # Putting pandas' warning message back
    warnings.filterwarnings('default')

    if return_all:
        return data, X_col, Y_col
    else:
        return data


#######################################################################################################################

# Adding One Hot Encoding
[docs]def OneHotEncoding(data, columns, drop=True, verbose=False):
    """Performs One Hot Encoding (OHE) usally used in Machine Learning.

    :param data: Data Frame on which we apply One Hot Encoding.
    :type data: :obj:`pandas.DataFrame`
    :param columns: Column to be converted to dummy variables.
    :type columns: :obj:`list`
    :param drop: Drop the column for one attribute (first value that appears in the dataset). This helps avoid multicolinearity issues in subsequent models, defaults to True.
    :type drop: :obj:`bool`, optional
    :param verbose: Display progression, defaults to False.
    :type verbose: :obj:`bool`, optional

    :example:

        >>> from statinf.data import OneHotEncoding
        >>> print(df)
        ... +----+--------+----------+-----+
        ... | Id | Gender | Category | Age |
        ... +----+--------+----------+-----+
        ... |  1 | Male   |        A |  23 |
        ... |  2 | Female |        B |  21 |
        ... |  3 | Female |        A |  31 |
        ... |  4 | Male   |        C |  22 |
        ... |  5 | Female |        A |  26 |
        ... +----+--------+----------+-----+
        >>> # Encoding columns "Gender" and "Category"
        >>> new_df = OneHotEncoding(df, columns=["Gender", "Category"])
        >>> print(new_df)
        ... +----+---------------+------------+------------+-----+
        ... | Id | Gender_Female | Category_B | Category_C | Age |
        ... +----+---------------+------------+------------+-----+
        ... |  1 |             0 |          0 |          0 |  23 |
        ... |  2 |             1 |          1 |          0 |  21 |
        ... |  3 |             1 |          0 |          0 |  31 |
        ... |  4 |             0 |          0 |          1 |  22 |
        ... |  5 |             1 |          0 |          0 |  26 |
        ... +----+---------------+------------+------------+-----+
        >>> # Listing the newly created columns
        >>> print(new_df.meta._ohe)
        ... {'Gender': ['Gender_Female'],
        ...  'Category': ['Category_A', 'Category_B']}
        >>> # Get the aggregated list of encoded columns
        >>> print(new_df.meta._ohe_all_columns)
        ... ['Gender_Female', 'Category_B', 'Category_C']

    :return: Transformed data with One Hot Encoded variables.
            New attributes are added to the data frame:

            * :obj:`df.meta._ohe`: contains the encoded columns and the created columns.
            * :obj:`df.meta._ohe_all_columns`: aggregates the newly created columns in one list. This list can directly be passed or appended to the input columns argument of subsequent models.
    :rtype: :obj:`pandas.DataFrame`
    """

    dataset = data.copy()

    try:
        if dataset.meta._ohe_exists:
            dataset.meta._ohe_exists = True
    except Exception:
        dataset.meta = SimpleNamespace()
        dataset.meta._ohe_exists = True
        dataset.meta._ohe = {}
        dataset.meta._ohe_all_columns = []

    cols = [columns] if type(columns) == str else columns

    # Start encoding column by column
    for column in cols:
        # Get all values from the column
        all_values = dataset[column].unique()
        all_values = all_values[1:] if drop else all_values
        new_cols = [f'{column}_{val}' for val in all_values]
        # Add column metadata
        dataset.meta._ohe.update({column: new_cols})
        dataset.meta._ohe_all_columns += new_cols

        # Encode values
        for val in all_values:
            if verbose:
                print('Encoding for value: ' + str(val))
            colname = column + '_' + str(val)
            dataset.loc[:, colname] = 0
            dataset.loc[dataset[column] == val, colname] = 1

        # Drop the original categorical column
        dataset.drop(columns=[column], inplace=True)

    return(dataset)


#######################################################################################################################

# Convert an array of values into a dataset matrix: used for LSTM data pre-processing
[docs]def create_dataset(data, n_in=1, n_out=1, dropnan=True):
    """Function to convert a DataFrame into into multivariate time series format readable by Keras LSTM.

    :param data: DataFrame on which to aply the transformation.
    :type data: :obj:`pandas.DataFrame`
    :param n_in: Input dimension also known as look back or size of the window, defaults to 1
    :type n_in: :obj:`int`, optional
    :param n_out: Output dimension, defaults to 1
    :type n_out: :obj:`int`, optional
    :param dropnan: Remove empty values in the series, defaults to True
    :type dropnan: :obj:`bool`, optional

    :return: Features converted for Keras LSTM.
    :rtype: :obj:`pandas.DataFrame`
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg


[docs]def split_sequences(data, look_back=1):
    """Split a multivariate time series from :py:meth:`statinf.data.ProcessData.multivariate_time_series` into a Keras' friendly format for LSTM.

    :param data: Data in the format of sequences to transform.
    :type data: :obj:`numpy.ndarray`
    :param look_back: Size of the trailing window, number of time steps to consider, defaults to 1.
    :type look_back: :obj:`int`

    :exemple:

        >>> from statinf.data import multivariate_time_series, split_sequences
        >>> train_to_split = multivariate_time_series(train)
        >>> X, y = split_sequences(train_to_split, look_back=7)

    :return: * :obj:`x`: Input data converted for Keras LSTM.
        * :obj:`y`: Target series converted for Keras LSTM.
    :rtype: * :obj:`numpy.ndarray`
        * :obj:`numpy.ndarray`
    """
    X, y = list(), list()
    for i in range(len(data)):
        # find the end of this pattern
        end_ix = i + look_back
        # check if we are beyond the dataset
        if end_ix > len(data) - 1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = data[i:end_ix, :], data[end_ix, :]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)


[docs]def multivariate_time_series(data):
    """Convert a dataframe into numpy array multivariate time series.

    :param data: Input data to transform.
    :type data: :obj:`pandas.DataFrame`

    :exemple:

        >>> from statinf.data import multivariate_time_series, split_sequences
        >>> train_to_split = multivariate_time_series(train)
        >>> X, y = split_sequences(train_to_split, look_back=7)

    :return: Transformed multivariate time series data.
    :rtype: :obj:`numpy.ndarray`
    """
    to_stack = ()
    for _c in data.columns:
        series = data[_c].to_numpy()
        to_stack += (series.reshape((len(series), 1)), )
    return np.hstack(to_stack)


#######################################################################################################################

# Scale dataset
[docs]class Scaler:
    def __init__(self, data, columns):
        """Data scaler.

        :param data: Data set to scale.
        :type data: :obj:`pandas.DataFrame`
        :param columns: Columns to scale.
        :type columns: :obj:`list`

        :example:

            >>> from statinf.data import Scaler, generate_dataset
            >>> coeffs = [1.2556, -0.465, 1.665414, 2.5444, -7.56445]
            >>> data = generate_dataset(coeffs, n=10, std_dev=2.6)
            >>> # Original dataset
            >>> print(data)
            ... +-----------+-----------+-----------+-----------+-----------+-----------+
            ... |        X0 |        X1 |        X2 |        X3 |        X4 |         Y |
            ... +-----------+-----------+-----------+-----------+-----------+-----------+
            ... |  0.977594 |  1.669510 | -1.385569 |  0.696975 | -1.207098 |  8.501692 |
            ... | -0.953802 |  1.025392 | -0.639291 |  0.658251 |  0.746814 | -7.186085 |
            ... | -0.148140 | -0.972473 |  0.843746 |  1.306845 |  0.269834 |  1.939924 |
            ... |  0.499385 | -1.081926 |  2.646441 |  0.910503 |  0.857189 |  0.389257 |
            ... | -0.563977 | -0.511933 | -0.726744 | -0.630345 | -0.486822 | -0.125787 |
            ... | -0.434994 | -0.396210 |  1.101739 | -0.660236 | -1.197566 |  7.735832 |
            ... |  0.032478 | -0.114952 | -0.097337 |  1.794769 |  1.239423 | -5.510332 |
            ... |  0.085569 | -0.600019 |  0.224186 |  0.301771 |  1.278387 | -8.648084 |
            ... | -0.028844 | -0.329940 | -0.301762 |  0.946077 | -0.359133 |  5.099971 |
            ... | -0.665312 |  0.270254 | -1.263288 |  0.545625 |  0.499162 | -6.126528 |
            ... +-----------+-----------+-----------+-----------+-----------+-----------+
            >>> # Load scaler class
            >>> scaler = Scaler(data=data, columns=['X1', 'X2'])
            >>> # Scale our dataset with MinMax method
            >>> scaled_df = scaler.MinMax()
            >>> print(scaled_df)
            ... +-----------+-----------+-----------+-----------+-----------+-----------+
            ... |        X0 |        X1 |        X2 |        X3 |        X4 |         Y |
            ... +-----------+-----------+-----------+-----------+-----------+-----------+
            ... |  0.977594 |  1.000000 |  0.000000 |  0.696975 | -1.207098 |  8.501692 |
            ... | -0.953802 |  0.765898 |  0.185088 |  0.658251 |  0.746814 | -7.186085 |
            ... | -0.148140 |  0.039781 |  0.552904 |  1.306845 |  0.269834 |  1.939924 |
            ... |  0.499385 |  0.000000 |  1.000000 |  0.910503 |  0.857189 |  0.389257 |
            ... | -0.563977 |  0.207162 |  0.163399 | -0.630345 | -0.486822 | -0.125787 |
            ... | -0.434994 |  0.249221 |  0.616890 | -0.660236 | -1.197566 |  7.735832 |
            ... |  0.032478 |  0.351444 |  0.319501 |  1.794769 |  1.239423 | -5.510332 |
            ... |  0.085569 |  0.175148 |  0.399244 |  0.301771 |  1.278387 | -8.648084 |
            ... | -0.028844 |  0.273307 |  0.268801 |  0.946077 | -0.359133 |  5.099971 |
            ... | -0.665312 |  0.491445 |  0.030328 |  0.545625 |  0.499162 | -6.126528 |
            ... +-----------+-----------+-----------+-----------+-----------+-----------+
            >>> # Unscale the new dataset to retreive previous data scale
            >>> unscaled_df = scaler.unscaleMinMax(scaled_df)
            >>> print(unscaled_df)
            ... +-----------+-----------+-----------+-----------+-----------+-----------+
            ... |        X0 |        X1 |        X2 |        X3 |        X4 |         Y |
            ... +-----------+-----------+-----------+-----------+-----------+-----------+
            ... |  0.977594 |  1.669510 | -1.385569 |  0.696975 | -1.207098 |  8.501692 |
            ... | -0.953802 |  1.025392 | -0.639291 |  0.658251 |  0.746814 | -7.186085 |
            ... | -0.148140 | -0.972473 |  0.843746 |  1.306845 |  0.269834 |  1.939924 |
            ... |  0.499385 | -1.081926 |  2.646441 |  0.910503 |  0.857189 |  0.389257 |
            ... | -0.563977 | -0.511933 | -0.726744 | -0.630345 | -0.486822 | -0.125787 |
            ... | -0.434994 | -0.396210 |  1.101739 | -0.660236 | -1.197566 |  7.735832 |
            ... |  0.032478 | -0.114952 | -0.097337 |  1.794769 |  1.239423 | -5.510332 |
            ... |  0.085569 | -0.600019 |  0.224186 |  0.301771 |  1.278387 | -8.648084 |
            ... | -0.028844 | -0.329940 | -0.301762 |  0.946077 | -0.359133 |  5.099971 |
            ... | -0.665312 |  0.270254 | -1.263288 |  0.545625 |  0.499162 | -6.126528 |
            ... +-----------+-----------+-----------+-----------+-----------+-----------+
        """
        super(Scaler, self).__init__()
        self.data = data.copy()
        self.scalers = {}
        self.columns = list(columns)

        for c in columns:
            _min = self.data[c].min()
            _max = self.data[c].max()
            _mean = self.data[c].mean()
            _std = self.data[c].std()
            _scale_temp = {'min': float(_min),
                           'max': float(_max),
                           'mean': float(_mean),
                           'std': float(_std),
                           }
            self.scalers.update({c: _scale_temp})

    def _col_to_list(self, columns):
        """Transforms column names to be scaled as list.

        :param columns: Column names to be scaled.
        :type columns: :obj:`list` or :obj:`str`

        :return: Column name(s) as a list
        :rtype: :obj:`list`
        """
        if columns is None:
            cols = self.columns
        elif type(columns) == str:
            cols = [columns]
        else:
            cols = columns
        return cols

[docs]    def MinMax(self, data=None, columns=None, feature_range=(0, 1), col_suffix=''):
        """Min-max scaler. Data we range between 0 and 1.

        :param data: Data set to scale, defaults to None, takes data provided in :py:meth:`__init__`, defaults to None.
        :type data: :obj:`pandas.DataFrame`, optional
        :param columns: Columns to be scaled, defaults to None, takes the list provided in :py:meth:`__init__`, defaults to None.
        :type columns: :obj:`list`, optional
        :param feature_range: Expected value range of the scaled data, defaults to (0, 1).
        :type feature_range: :obj:`tuple`, optional
        :param col_suffix: Suffix to add to colum names, defaults to '', overrides the existing columns.
        :type col_suffix: :obj:`str`, optional

        :formula: .. math:: x_{\\text{scaled}} = \\dfrac{x - \\min(x)}{\\max(x) - \\min(x)} \\cdot (f\\_max - f\\_min) + f\\_min

            where :math:`(f\\_min, f\\_max)` defaults to :math:`(0, 1)` and corresponds to the expected data range of the scaled data from argument :obj:`feature_range`.

        :return: Data set with scaled features.
        :rtype: :obj:`pandas.DataFrame`
        """
        self._minmax_suffix = col_suffix
        self._minmax_feature_range = feature_range
        f_min, f_max = self._minmax_feature_range
        cols = self._col_to_list(columns)
        df = self.data if data is None else data.copy()

        for c in cols:
            # Retreive min and max
            _min = self.scalers[c]['min']
            _max = self.scalers[c]['max']
            tmp = (df[c] - _min) / (_max - _min)
            df[c + col_suffix] = tmp * (f_max - f_min) + f_min
        return df

[docs]    def unscaleMinMax(self, data=None, columns=None, columns_mapping={}):
        """Unscale from min-max.
        Retreives data from the same range as the original features.

        :param data: Data set to unscale, defaults to None, takes data provided in :py:meth:`__init__`.
        :type data: :obj:`pandas.DataFrame`, optional
        :param columns: Columns to be unscaled, defaults to None, takes the list provided in :py:meth:`__init__`.
        :type columns: :obj:`list`, optional
        :param columns_mapping: Mapping between eventual renamed columns and original scaled column.
        :type columns_mapping: :obj:`dict`, optional

        :formula: .. math:: x_{\\text{unscaled}} = x_{\\text{scaled}} \\cdot \\left(\\max(x) - \\min(x) \\right) + \\min(x)

        :return: Unscaled data set.
        :rtype: :obj:`pandas.DataFrame`
        """
        cols = self._col_to_list(columns)
        df = self.data if data is None else data.copy()
        unscale_suffix = '_unscaled' if self._minmax_suffix != '' else ''
        f_min, f_max = self._minmax_feature_range

        for c in cols:
            # Apply eventual column name mapping
            _c = c if columns_mapping.get(c) is None else columns_mapping.get(c)
            # Retreive min and max
            _min = self.scalers[_c]['min']
            _max = self.scalers[_c]['max']
            tmp = (df[c + self._minmax_suffix] - f_min) / (f_max - f_min)
            df[c + unscale_suffix] = (tmp * (_max - _min)) + _min
        return df

[docs]    def Normalize(self, center=True, reduce=True, data=None, columns=None, col_suffix=''):
        """Data normalizer.
        Centers and reduces features (from mean and standard deviation).

        :param center: Center the variable, i.e. substract the mean, defaults to True.
        :type center: :obj:`bool`, optional
        :param reduce: Reduce the variable, i.e. devide by standard deviation, defaults to True.
        :type reduce: :obj:`bool`, optional
        :param data: Data set to normalize, defaults to None, takes data provided in :py:meth:`__init__`.
        :type data: :obj:`pandas.DataFrame`, optional
        :param columns: Columns to be normalize, defaults to None, takes the list provided in :py:meth:`__init__`.
        :type columns: :obj:`list`, optional
        :param col_suffix: [description], defaults to ''
        :type col_suffix: :obj:`str`, optional

        :formula: .. math:: x_{\\text{scaled}} = \\dfrac{x - \\bar{x}}{\\sqrt{\\mathbb{V}(x)}}

        :return: Data set with normalized features.
        :rtype: :obj:`pandas.DataFrame`
        """
        self._standard_suffix = col_suffix
        cols = self._col_to_list(columns)
        df = self.data if data is None else data.copy()

        for c in cols:
            # Retreive mean
            if center:
                _mean = self.scalers[c]['mean']
                self.scalers[c].update({'center': True})
            else:
                _mean = 0.
                self.scalers[c].update({'center': False})
            # Retreive std
            if reduce:
                _std = self.scalers[c]['std']
                self.scalers[c].update({'reduce': True})
            else:
                _std = 1.
                self.scalers[c].update({'reduce': False})

            df[c + col_suffix] = (df[c] - _mean) / _std
        return df

[docs]    def unscaleNormalize(self, data=None, columns=None, columns_mapping={}):
        """Denormalize data to retreive the same range as the original data set.

        :param data: Data set to unscale, defaults to None, takes data provided in :py:meth:`__init__`.
        :type data: :obj:`pandas.DataFrame`, optional
        :param columns: Columns to be unscaled, defaults to None, takes the list provided in :py:meth:`__init__`.
        :type columns: :obj:`list`, optional
        :param columns_mapping: Mapping between eventual renamed columns and original scaled column.
        :type columns_mapping: :obj:`dict`, optional

        :formula: .. math:: x_{\\text{unscaled}} = x_{\\text{scaled}} \\cdot \\sqrt{\\mathbb{V}(x)} + \\bar{x}

        :return: De-normalized data set.
        :rtype: :obj:`pandas.DataFrame`
        """
        cols = self._col_to_list(columns)
        df = self.data if data is None else data.copy()
        unscale_suffix = '_unscaled' if self._standard_suffix != '' else ''

        for c in cols:
            # Apply eventual column name mapping
            _c = c if columns_mapping.get(c) is None else columns_mapping.get(c)
            # Retreive min and max
            _mean = self.scalers[_c]['mean'] if self.scalers[_c]['center'] else 0.
            _std = self.scalers[_c]['std'] if self.scalers[_c]['reduce'] else 1.

            df[c + unscale_suffix] = (df[c + self._standard_suffix] * _std) + _mean

        return df