Source code for statinf.ml.performance

import warnings
import pandas as pd
import numpy as np
import math

from ..misc import format_object


[docs]class BinaryPerformance: def __init__(self, y_true, y_pred): """Gives detailed perfomance metrics for binary calssification models. :param y_true: Array of true targets. :type y_true: :obj:`numpy.ndarray` :param y_pred: Array of predicted targets. :type y_pred: :obj:`numpy.ndarray` """ warnings.filterwarnings('ignore') # Format y_true and y_pred true = format_object(y_true, to_type='list', name='y_true') pred = format_object(y_pred, to_type='list', name='y_pred') # Put data to a DF for_conf = pd.DataFrame({'true': true, 'pred': pred, 'perf': ''}) # Compute True and False positives/negatives for_conf['perf'][(for_conf.true == 1) & (for_conf.pred == 1)] = 'true_positive' for_conf['perf'][(for_conf.true == 0) & (for_conf.pred == 0)] = 'true_negative' for_conf['perf'][(for_conf.true == 1) & (for_conf.pred == 0)] = 'false_negative' for_conf['perf'][(for_conf.true == 0) & (for_conf.pred == 1)] = 'false_positive' # Number of True and False positives/negatives self.tp = len(for_conf[(for_conf.perf == 'true_positive')]) self.tn = len(for_conf[(for_conf.perf == 'true_negative')]) self.fn = len(for_conf[(for_conf.perf == 'false_negative')]) self.fp = len(for_conf[(for_conf.perf == 'false_positive')]) self.m = len(y_pred) # Confusion matrix self.conf = pd.DataFrame({'True 0': [0., 0.], 'True 1': [0., 0.]}, index=['Predicted 0', 'Predicted 1']) # Fill confusion matrix self.conf['True 0']['Predicted 0'] = self.tn/self.m self.conf['True 0']['Predicted 1'] = self.fp/self.m self.conf['True 1']['Predicted 0'] = self.fn/self.m self.conf['True 1']['Predicted 1'] = self.tp/self.m warnings.filterwarnings('default')
[docs] def accuracy(self): """Binary accuracy of the model. Percentage of equal values between :obj:`y_true` and :obj:`y_pred`. :formula: .. math:: accuracy = \\dfrac{TP + TN}{n} :return: Accuracy :rtype: :obj:`float` """ return((self.tp + self.tn)/self.m)
[docs] def confusion(self): """Confusion matrix :return: Confusion matrix +-----------------+------------+------------+ | | **True 0** | **True 0** | +=================+============+============+ | **Predicted 0** | :math:`TN` | :math:`TN` | +-----------------+------------+------------+ | **Predicted 1** | :math:`FP` | :math:`TP` | +-----------------+------------+------------+ :rtype: :obj:`pandas.DataFrame` """ return(self.conf * 100)
[docs] def precision(self): """Precision metric, proportion of actual 1 values amongst the ones predicted. :formula: .. math:: precision = \\dfrac{TP}{TP + FP} :return: Precision :rtype: :obj:`float` """ return(self.tp / (self.tp + self.fp))
[docs] def recall(self): """ Recall metric, proportion of values we predicted as one from the actuals ones. :formula: .. math:: recall = \\dfrac{TP}{TP + FN} :return: Recall :rtype: :obj:`float` """ return(self.tp / (self.tp + self.fn))
[docs] def F1_score(self): """F1-score :formula: .. math:: F_{1} = 2 \\cdot \\dfrac{precision \\times recall}{precision + recall} :return: F1-score :rtype: :obj:`float` """ return(2 * (self.precision() * self.recall()) / (self.precision() + self.recall()))
[docs]def mean_squared_error(y_true, y_pred, root=False): """Mean Squared Error :param y_true: Real values on which to compare. :type y_true: :obj:`numpy.ndarray` :param y_pred: Predicted values. :type y_pred: :obj:`numpy.ndarray` :param root: Return Root Mean Squared Error (RMSE), defaults to False. :type root: :obj:`bool`, optional :formula: :math:`loss = \\dfrac{1}{m} \\times \\sum_{i=1}^{m} (y_i - \\hat{y}_i)^2` :references: * Friedman, J., Hastie, T. and Tibshirani, R., 2001. `The elements of statistical learning <https://web.stanford.edu/~hastie/Papers/ESLII.pdf>`_. Ch. 2, pp. 24. :return: Mean Squared Error or its root. :rtype: float """ true = format_object(y_true, to_type='array', name='y_true') pred = format_object(y_pred, to_type='array', name='y_pred') # pred = to_array(y_pred, 'y_pred') # Compute the square of the difference loss = (pred - true)**2 if root: return math.sqrt(loss.mean()) else: return loss.mean()
# MAPE formula
[docs]def mape(y_true, y_pred, weights=False): """Computes the Mean Absolute Percentage Error (MAPE) or Weighted Mean Absolute Percentage Error (WMAPE). :param y_true: Real values on which to compare. :type y_true: :obj:`numpy.array` :param y_pred: Predicted values. :type y_pred: :obj:`numpy.array` :param weights: Compute WMAPE. :type weights: :obj:`bool` :formula: * :math:`MAPE(y, \\hat{y}) = \\dfrac{100}{n} \\sum_{i=1}^{n} \\dfrac{|y - \\hat{y}|}{y}` * :math:`WMAPE(y, \\hat{y}) = 100 \\dfrac{\\sum_{i=1}^{n} {\\dfrac{|y - \\hat{y}|}{y}} \\times y}{\\sum_{i=1}^{n} y}` :return: Mean Absolute Percentage Error as percentage. :rtype: float """ warnings.filterwarnings('ignore') # Format y_true and y_pred _true = format_object(y_true, to_type='list', name='y_true') _pred = format_object(y_pred, to_type='list', name='y_pred') preds = pd.DataFrame({'true': _true, 'pred': _pred}) preds['abs_err'] = np.abs(preds['true'] - preds['pred']) preds['err_contrib'] = preds['abs_err']/preds['true'] if weights: preds['err_contrib'] = preds['err_contrib'] * preds['true'] * 100 out = preds['err_contrib'].sum() / preds.true.sum() else: out = 100 * preds['err_contrib'].sum() / preds['true'].sum() return out