# Source code for statinf.stats.descriptive

import numpy as np
from ..data import rankdata

# ##### Variance

[docs]def var(x, std=False, df=1):
"""Compute the variance of a variable.

:param x: Input variable. Format can be :obj:numpy.array, :obj:list or :obj:pandas.Series.
:type x: :obj:numpy.array
:param std: Returns standard deviation, i.e. :math:\\sqrt{\\mathbb{V}(\\mathbf{X})}, defauls to False.
:type std: :obj:bool, optional
:param df: Degrees of freedom, defaults to 1.
:type df: :obj:int, optional

:formula: .. math:: \\mathbb{V}(\\mathbf{X}) = \\dfrac{1}{n - 1} \\sum_{i = 1}^{n} (X_{i} - \\bar{X})^{2}

:example:

>>> from statinf import stats
>>> x = [0.023699, 0.021436, 0.0200109, 0.0202762, 0.0165271, 0.01027]
>>> stats.var(x)
... 2.2492979044000003e-05

:return: Variance.
:rtype: :obj:float
"""
x = np.asarray(x)
n = len(x)
x_bar = x.mean()
s2 = ((x - x_bar) ** 2).sum() / (n-df)
if std:
return np.sqrt(s2)
else:
return s2

# ##### Covariance

[docs]def cov(x, y):
"""Compute the covariance between two variables.

:param x: Input variable. Format can be :obj:numpy.array, :obj:list or :obj:pandas.Series.
:type x: :obj:numpy.array
:param y: Input variable. Format can be :obj:numpy.array, :obj:list or :obj:pandas.Series.
:type y: :obj:numpy.array

:formula: .. math:: Cov(\\mathbf{X}, \\mathbf{Y}) = \\dfrac{ \\sum_{i = 1}^{n} (X_{i} - \\bar{X}) (Y_{i} - \\bar{Y}) }{n - 1}

:example:

>>> from statinf import stats
>>> x = [0.023699, 0.021436, 0.0200109, 0.0202762, 0.0165271, 0.01027]
>>> y = [9.4228, 9.27951, 9.167963, 9.68820, 9.56490, 7.543]
>>> stats.cov(x, y)
... 0.003047229298620001

:reference: * DeGroot, M. H., & Schervish, M. J. (2012). Probability and statistics. Pearson Education.

:return: Covariance value.
:rtype: :obj:float
"""
x = np.asarray(x)
y = np.asarray(y)
x - x.mean()
y - y.mean()

x_xbar = x - x.mean()
y_ybar = y - y.mean()

num = (x_xbar * y_ybar).sum()
cov = num / (len(x) - 1)
return cov

# ##### Pearson's correlation coefficient

[docs]def pearson(x, y):
"""Compute the Pearson's coefficient correlation

:param x: Input variable. Format can be :obj:numpy.array, :obj:list or :obj:pandas.Series.
:type x: :obj:numpy.array
:param y: Input variable. Format can be :obj:numpy.array, :obj:list or :obj:pandas.Series.
:type y: :obj:numpy.array

:formula: .. math:: \\rho = \\dfrac{ Cov(X, Y) }{\\sigma_{X} \\sigma_{Y}}

where :math:\\sigma_{Z} = \\sqrt{\\mathbb{V}(Z)}

:example:

>>> from statinf import stats
>>> x = [0.023699, 0.021436, 0.0200109, 0.0202762, 0.0165271, 0.01027]
>>> y = [9.4228, 9.27951, 9.167963, 9.68820, 9.56490, 7.543]
>>> stats.pearson(x, y)
... 0.9750052703452801

:reference: * DeGroot, M. H., & Schervish, M. J. (2012). Probability and statistics. Pearson Education.

:return: Pearson's coefficient correlation.
:rtype: :obj:float
"""
x = np.asarray(x)
y = np.asarray(y)
cv = cov(x, y)
stdx = x.std()
stdy = y.std()
return cv / (stdx * stdy)

# ##### Spearman's rank correlation coefficient

[docs]def spearman(x, y):
"""Spearman's rank correlation coefficient.

:param x: Input variable. Format can be :obj:numpy.array, :obj:list or :obj:pandas.Series.
:type x: :obj:numpy.array
:param y: Input variable. Format can be :obj:numpy.array, :obj:list or :obj:pandas.Series.
:type y: :obj:numpy.array

:formula: .. math:: \\rho = 1 - \\dfrac{ 6 \\sum_{i=1}^{n} d^{2} }{ n (n^{2} - 1)}

:example:

>>> from statinf import stats
>>> x = [0.023699, 0.021436, 0.0200109, 0.0202762, 0.0165271, 0.01027]
>>> y = [9.4228, 9.27951, 9.167963, 9.68820, 9.56490, 7.543]
>>> stats.spearman(x, y)
... 0.37142857142857144

:return: Spearman's rank correlation coefficient.
:rtype: :obj:float
"""
n = len(x)
rk_x = rankdata(x)
rk_y = rankdata(y)
d = (rk_y - rk_x) ** 2

rho = 1 - (6 * d.sum()) / (n * (n ** 2 - 1))

return rho