Source code for statinf.stats.tests

import numpy as np
import math
import warnings
from scipy import stats as scp

from . import descriptive as desc
from ..misc import test_summary, format_object


# One sample Student test
[docs]def ttest(x, mu=0, alpha=0.05, is_bernoulli=False, two_sided=True, return_tuple=False):
    """One sample Student's test.

        In the two-sided setup, we aim at testing:

        .. math:: H_{0}: \\bar{X} = \\mu \\text{  against  } H_{1}: \\bar{X} \\neq \\mu

        The one-sided setup tests :math:`H_{0}: \\bar{X} \\leq \\mu` against :math:`H_{0}: \\bar{X} > \\mu`.

        The p-value is computed as:

        .. math:: \\mathbb{P}(|Z| \\geq |t| \\mid H_{0} \\text{ holds})

        with, under :math:`H_{0}`:

        .. math:: t = \\dfrac{\\bar{X}_{1} - \\mu }{ \\dfrac{s}{\\sqrt{n}} } \\sim \\mathcal{N}(0, 1)

        if :math:`s = \\mathbb{V}(\\mathbf{X})` is known or if :math:`n \\gg 30`, otherwise :math:`t \\sim \\mathcal{T}_{n - 1}`,
        a Student distribution with :math:`n-1` degrees of freedom.
        In case of a bernoulli distribution, then :math:`s = \\mu(1 - \\mu)`.

    :param x: Input variable. Format can be :obj:`numpy.array`, :obj:`list` or :obj:`pandas.Series`.
    :type x: :obj:`numpy.array`
    :param mu: Theoretical mean to be evaluated in the null hypothesis, defaults to 0.
    :type mu: :obj:`int`, optional
    :param alpha: Confidence level, defaults to 0.05.
    :type alpha: :obj:`float`, optional
    :param is_bernoulli: Input value follows a Bernoulli distribution, i.e. :math:`\\mathbf{X} \\sim \\mathcal{B}(p)` with :math:`p \\in [0, 1]`, defaults to False.
    :type is_bernoulli: :obj:`bool`, optional
    :param two_sided: Perform a two-sided test, defaults to True.
    :type two_sided: :obj:`bool`, optional
    :param return_tuple: Return a tuple with t statistic, critical value and p-value, defaults to False.
    :type return_tuple: :obj:`bool`, optional

    :example:

    >>> from statinf import stats
    >>> stats.ttest([30.02, 29.99, 30.11, 29.97, 30.01, 29.99], mu=30)
    ... +------------------------------------------------------------+
    ... |                   One Sample Student test                  |
    ... +------------+----------------+------------+---------+-------+
    ... |     df     | Critical value |    T-value | p-value |   H0  |
    ... +------------+----------------+------------+---------+-------+
    ... |          5 |   2.5705818366 |  0.7392961 | 0.49295 | True  |
    ... +------------+----------------+------------+---------+-------+
    ... * Confidence level is 95.0%, we need p > 0.025 for two-sided test
    ... * We cannot reject the hypothesis H0: X_bar = 30

    :reference: * DeGroot, M. H., & Schervish, M. J. (2012). Probability and statistics. Pearson Education.
        * Student. (1908). The probable error of a mean. Biometrika, 1-25.

    :return: Summary for the test or tuple statistic, critical value, p-value.
    :rtype: :obj:`str` or :obj:`tuple`
    """

    # Define test degrees of freedom
    if two_sided:
        quant_order = 1 - (alpha / 2)
        h0 = f'X_bar = {mu}'
        h1 = f'X_bar != {mu}'
    else:
        quant_order = 1 - alpha
        h0 = f'X_bar <= {mu}'
        h1 = f'X_bar > {mu}'

    # Input vector as array
    x = np.asarray(x)
    # Sample size
    n = len(x)

    # Empirical mean
    x_bar = x.mean()
    # s estimator (variance)
    if is_bernoulli:
        s2 = x_bar * (1 - x_bar)
    else:
        s2 = desc.var(x)

    # Degrees of freedom
    df = n - 1

    # T statistic
    t = (x_bar - mu) / (math.sqrt(s2 / n))
    if two_sided:
        t = math.fabs(t)
    # p and critical values
    p = 2.0 * (1.0 - scp.t.cdf(t, df=df))

    if n > 30:
        cv = scp.norm.ppf(quant_order)
    else:
        cv = scp.t.ppf(quant_order, df=df)

    _summ = test_summary(df=df, critical_value=cv, t_value=t,
                         p_value=p,
                         title='One Sample Student test',
                         h0=h0, h1=h1,
                         alpha=alpha)

    if return_tuple:
        return t, cv, p
    else:
        print(_summ)


# 2 samples Student test
[docs]def ttest_2samp(x1, x2, alpha=0.05, paired=False, is_bernoulli=False, two_sided=True, return_tuple=False):
    """Two samples Student's test.

        If the samples are independent (:math:`X_{1} \\bot X_{2}`), we test:

        .. math:: H_{0}: \\bar{X}_{1} = \\bar{X}_{2} \\text{  against  } H_{1}: \\bar{X}_{1} \\neq \\bar{X}_{2}

        The test statistic is:

        .. math:: t = \\dfrac{\\bar{X}_{1} - \\bar{X}_{2} }{ \\sqrt{ \\dfrac{(n_{1} - 1) s^{2}_{1} + (n_{2} - 1) s^{2}_{2}}{n_{1} + n_{2} - 1} } \\sqrt{\\dfrac{1}{n_{1}} + \\dfrac{1}{n_{2}}} }

        where:

            * :math:`t \\sim \\mathcal{T}_{n_{1} + n_{2} - 2}`, if :math:`\\mathbf{X} \\sim \\mathcal{N}(\\mu_{X}, \\sigma^{2}_{X})` and :math:`\\mathbf{Y} \\sim \\mathcal{N}(\\mu_{Y}, \\sigma^{2}_{Y})`
            * :math:`t \\sim \\mathcal{N}(0, 1)`, if :math:`n_{1} \\gg 30` and :math:`n_{2} \\gg 30`

        If samples are paired:

        .. math:: H_{0}: \\bar{X}_{1} = \\bar{X}_{2} \\Leftrightarrow \\bar{X}_{1} - \\bar{X}_{2} = 0 \\Leftrightarrow \\bar{X}_{D} = 0

        We then compared a one sample test where the tested vector is the difference between both vectors :math:`X_{D} = X_{1} - X_{2}`
        for which we compare whether it is equal to 0.

    :param x1: Input variable. Format can be :obj:`numpy.array`, :obj:`list` or :obj:`pandas.Series`.
    :type x1: :obj:`numpy.array`
    :param x2: Input variable. Format can be :obj:`numpy.array`, :obj:`list` or :obj:`pandas.Series`.
    :type x2: :obj:`numpy.array`
    :param alpha: Confidence level, defaults to 0.05.
    :type alpha: :obj:`float`, optional
    :param paired: Performs one sample test of the difference if samples are paired, defaults to False.
    :type paired: :obj:`bool`, optional
    :param is_bernoulli: Input variables follow a Bernoulli distribution, i.e. :math:`\\mathbf{X} \\sim \\mathcal{B}(p)` with :math:`p \\in [0, 1]`, defaults to False.
    :type is_bernoulli: :obj:`bool`, optional
    :param two_sided: Perform a two-sided test, defaults to True.
    :type two_sided: :obj:`bool`, optional
    :param return_tuple: Return a tuple with t statistic, critical value and p-value, defaults to False.
    :type return_tuple: :obj:`bool`, optional

    :example:

    >>> from statinf import stats
    >>> a = [30.02, 29.99, 30.11, 29.97, 30.01, 29.99]
    >>> b = [29.89, 29.93, 29.72, 29.98, 30.02, 29.98]
    >>> stats.ttest_2samp(a, b)
    ... +------------------------------------------------------------+
    ... |                   Two Samples Student test                 |
    ... +------------+----------------+------------+---------+-------+
    ... |     df     | Critical value |    T-value | p-value |   H0  |
    ... +------------+----------------+------------+---------+-------+
    ... |         10 |  -1.8124611228 |  1.1310325 | 0.28444 | True  |
    ... +------------+----------------+------------+---------+-------+
    ...  * Confidence level is 95.0%, we need p > 0.05
    ...  * We cannot reject the hypothesis H0: X1 = X2
    ...  * Samples with unequal variances
    ...  * Same sample sizes

    :reference: * DeGroot, M. H., & Schervish, M. J. (2012). Probability and statistics. Pearson Education.
        * Student. (1908). The probable error of a mean. Biometrika, 1-25.

    :return: Summary for the test or tuple statistic, critical value, p-value.
    :rtype: :obj:`str` or :obj:`tuple`
    """
    x = np.asarray(x1)
    y = np.asarray(x2)

    # Define test degrees of freedom
    if two_sided:
        quant_order = 1 - (alpha / 2)
        h0 = 'X1_bar = X2_bar'
        h1 = 'X1_bar != X2_bar'
    else:
        quant_order = 1 - alpha
        h0 = 'X1 <= X2'
        h1 = 'X1 > X2'

    # Sample sizes
    n1, n2 = len(x), len(y)

    if paired:
        # If samples are paired, we perform a 1-sample student test
        # We compare if the difference is different from 0.
        mean1, mean2 = x.mean(), y.mean()
        d = x - y
        t, cv, p = ttest(d, alpha=alpha, return_tuple=True)
        df = len(d)
    else:
        # Else samples are independent
        # Compute means
        mean1, mean2 = x.mean(), y.mean()
        # Compute standard deviations
        if is_bernoulli:
            s1 = mean1 * (1 - mean1)
            s2 = mean2 * (1 - mean2)
        else:
            s1 = desc.var(x)
            s2 = desc.var(y)
        # Compute grouped variance
        sd = np.sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2))
        # Degrees of freedom
        df = n1 + n2 - 2
        # Calculate the t statistic
        t = (mean1 - mean2) / sd

        # calculate the critical value
        cv = scp.t.ppf(quant_order, df)
        # calculate the p-value
        if (n1 > 30) & (n2 > 30):
            p = 2.0 * (1.0 - scp.norm.cdf(math.fabs(t)))
        else:
            p = 2.0 * (1.0 - scp.t.cdf(math.fabs(t), df=df))

    extra = f" * E(X1) = {round(mean1, 3)} and E(X2) = {round(mean2, 3)} \n"
    extra += " * Performed test for paired samples \n" if paired else ''
    extra += " * Large sample sizes, t ~ N(0, 1) from CLT" if (n1 > 30) & (n2 > 30) else ' * Small sample sizes, assumed t ~ T(n-1)'

    _summ = test_summary(df=df, critical_value=cv, t_value=t,
                         p_value=p,
                         title='Two Samples Student test',
                         h0=h0, h1=h1,
                         alpha=alpha,
                         extra=extra)

    if return_tuple:
        return t, cv, p
    else:
        print(_summ)


# Kolmogorov-Smirnov test
[docs]def kstest(x1, x2='normal', alpha=0.05, return_tuple=False, **kwargs):
    """Kolmogorov-Smirnov test for sample tests.

        We test if two samples come from the same distribution :math:`H_{0}: F(x) = G(x)` against :math:`H_{1}: F(x)` follows a distribution that :math:`G(x)`.

        The statistic is:

        .. math:: D_{mn} = \\sup_{-\\infty < x < + \\infty} |F_{n}(x) - G_{m}(x)|

        and the critical value is given by:

        .. math:: c = \\mathbb{P} \\left[ \\left( \\dfrac{mn}{n + m} \\right)^{2} D_{mn} < K_{\\alpha} \\right]

        where :math:`K_{\\alpha}` represents the quantile of order :math:`\\alpha` from a Kolmogorov distribution.

        This test is an alternative to the :math:`\\chi^{2}`-test where we compare two distributions.
        By comparing an unknown empirical distribution :math:`F_{n}` with a distribution :math:`G_{n}` pulled from a known function, we can assess whether :math:`F_{n}` follows the same distribution as :math:`G_{n}`.
        For instance, when comparing :math:`F_{n}` (unknown) to :math:`G_{n} \\sim \\mathcal{N}(\\mu, \\sigma^{2})`,
        not rejecting :math:`H_{0}` would mean that we cannot reject the hypothesis that :math:`F_{n} \\sim \\mathcal{N}(\\mu, \\sigma^{2})`.

    :param x1: Input variable. Format can be :obj:`numpy.array`, :obj:`list` or :obj:`pandas.Series`.
    :type x1: :obj:`numpy.array`
    :param x2: Sample to be compared. Can be external empirical sample in the same format as :obj:`x1` or the name of a cdf which can be 'normal', 'beta', 'gamma', 'poisson', 'chisquare', 'exponential', 'gamma', defaults to 'normal'.
    :type x2: :obj:`str`, optional
    :param alpha: Confidence level, defaults to 0.05.
    :type alpha: :obj:`float`, optional
    :param return_tuple: Return a tuple with K statistic, critical value and p-value, defaults to False.
    :type return_tuple: :obj:`bool`, optional

    :example:

    >>> from statinf import stats
    >>> import numpy as np
    >>> stats.kstest(np.random.normal(size=100))
    ... +------------------------------------------------------------+
    ... |                   Kolmogorov-Smirnov test                  |
    ... +------------+----------------+------------+---------+-------+
    ... | D value    | Critical value | K-value    | p-value |   H0  |
    ... +------------+----------------+------------+---------+-------+
    ... |       0.09 |  1.35809863932 |  0.6363961 | 0.81275 | True  |
    ... +------------+----------------+------------+---------+-------+
    ...  * We cannot reject the hypothesis H0: F(x) ~ normal
    ...  * Confidence level is 95.0%, we need p > 0.05

    :reference: * DeGroot, M. H., & Schervish, M. J. (2012). Probability and statistics. Pearson Education.
        * Kolmogorov-Smirnov, A. N., Kolmogorov, A., & Kolmogorov, M. (1933). Sulla determinazione empírica di uma legge di distribuzione.
        * Marsaglia, G., Tsang, W. W., & Wang, J. (2003). `Evaluating Kolmogorov's distribution <https://www.jstatsoft.org/article/view/v008i18>`_. Journal of Statistical Software, 8(18), 1-4.

    :return: Summary for the test or tuple statistic, critical value, p-value.
    :rtype: :obj:`str` or :obj:`tuple`
    """

    quant_order = 1 - alpha
    # Format x1
    x = np.asarray(x1)
    n = len(x)

    if type(x2) == str:
        h0 = f"F(x) ~ {x2}"
        h1 = f"F(x) is not {x2}"
        if x2.lower() in ['normal', 'norm', 'gaussian', 'gauss']:
            y = np.random.normal(size=n, **kwargs)
        elif x2.lower() in ['beta']:
            y = np.random.beta(size=n, **kwargs)
        elif x2.lower() in ['gamma']:
            y = np.random.gamma(size=n, **kwargs)
        elif x2.lower() in ['poisson']:
            y = np.random.poisson(size=n, **kwargs)
        elif x2.lower() in ['chisquare', 'chi-square', 'chi2', 'x2']:
            y = np.random.chisquare(size=n, **kwargs)
        elif x2.lower() in ['exponential', 'expo']:
            y = np.random.exponential(size=n, **kwargs)
        elif x2.lower() in ['gumbel']:
            y = np.random.gumbel(size=n, **kwargs)
        else:
            raise ValueError('cdf value for x2 is not valid.')
    else:
        y = np.asarray(x2)
        h0 = "F(x) = G(x)"
        h1 = "F(x) and G(x) have different distributions"

    # Sample sizes
    m = len(y)
    # Sort values and concatenate to one array
    x_sort = np.sort(x)
    y_sort = np.sort(y)
    concat_data = np.concatenate([x_sort, y_sort])
    # Build sort distributions to compute the max deviance
    cdf1 = np.searchsorted(x_sort, concat_data, side='right') / float(n)
    cdf2 = (np.searchsorted(y_sort, concat_data, side='right')) / float(m)
    # Find the max deviance
    d = np.max(np.absolute(cdf1 - cdf2))
    # Compute K value
    k = d * math.sqrt((n * m) / (n + m))
    cv = scp.kstwobign.ppf(quant_order)
    p = 1.0 - scp.kstwobign.cdf(math.fabs(k))

    _summ = test_summary(df=n, critical_value=cv, t_value=k,
                         p_value=p, alpha=alpha,
                         title='Kolmogorov-Smirnov test',
                         h0=h0, h1=h1,
                         extra=f' * The D-value is: {round(d, 5)}')

    if return_tuple:
        return k, cv, p
    else:
        print(_summ)


# Wilcoxon test
[docs]def wilcoxon(x, y=None, alpha=0.05, alternative='two-sided', mode='auto', zero_method='wilcox', return_tuple=False):
    """Wilcoxon signed-rank test.

    :param x: First sample to compare. If `y` is not provided, will correspond to the difference :math:`x - y`.
    :type x: :obj:`numpy.array`
    :param y: Second sample to compare, defaults to None.
    :type y: :obj:`numpy.array`, optional
    :param alpha: Confidence level, defaults to 0.05.
    :type alpha: :obj:`float``, optional
    :param alternative: Perform a one or two-sided test. Values can be `two-sided`, `greater`, `less`, defaults to 'two-sided'.
    :type alternative: :obj:`str`, optional
    :param mode: Method to calculate the p-value. Computes the exact distribution is sample size is less than 25, otherwise uses normal approximation. Values can be `auto`, `approx` or `exact`, defaults to 'auto'.
    :type mode: :obj:`str`, optional
    :param zero_method: Method to handle the zero differences., defaults to 'wilcox'
    :type zero_method: :obj:`str`, optional
    :param return_tuple: Return a tuple with t statistic, critical value and p-value, defaults to False.
    :type return_tuple: :obj:`bool`, optional

    :example:

    >>> from statinf import stats
    >>> import numpy as np
    >>> x = np.random.poisson(2, size=100)
    >>> y = x_dist + np.random.normal(loc=0, scale=1, size=100)
    >>> stats.wilcoxon(x, y)
    ... +------------------------------------------------------------+
    ... |                       Wilcoxon test                        |
    ... +------------+----------------+------------+---------+-------+
    ... |     df     | Critical value | Stat value | p-value |   H0  |
    ... +------------+----------------+------------+---------+-------+
    ... |        100 |   1.9599639845 |  -1.316878 | 0.18788 | True  |
    ... +------------+----------------+------------+---------+-------+
    ...  * We cannot reject H0: x - y ~ symmetric distribution centered in 0
    ...  * The T-value is: 2142.0

    :reference: * Wilcoxon, F., Individual Comparisons by Ranking Methods, Biometrics Bulletin, Vol. 1, 1945, pp. 80-83.
        * Cureton, E.E., The Normal Approximation to the Signed-Rank Sampling Distribution When Zero Differences are Present, Journal of the American Statistical Association, Vol. 62, 1967, pp. 1068-1069.

    :return: Summary for the test or tuple statistic, critical value, p-value.
    :rtype: :obj:`tuple`
    """

    # Code mostly inspired from: https://github.com/scipy/scipy/blob/v1.7.0/scipy/stats/morestats.py#L2984-L3233

    # Define test degrees of freedom
    if alternative == 'two-sided':
        quant_order = 1 - (alpha / 2)
        h0 = 'x - y ~ symmetric distribution centered in 0'
        h1 = 'x - y is not a symmetric distribution centered in 0'
    else:
        quant_order = 1 - alpha
        h0 = 'x - y ~ symmetric distribution centered in 0'
        h1 = 'x - y is not a symmetric distribution centered in 0'

    if y is None:
        # If y is not provided, we consider x already corresponds to x - y
        d = format_object(x, to_type='array', name='x')
    else:
        x = format_object(x, to_type='array', name='x')
        y = format_object(y, to_type='array', name='y')
        d = x - y

    if mode == "auto":
        if len(d) <= 25:
            mode = "exact"
        else:
            mode = "approx"

    n_zero = np.sum(d == 0)
    if n_zero > 0:
        mode = "approx"
        warnings.warn("Found some ties, switching mode to 'approx.'")

    if mode == "approx":
        if zero_method in ["wilcox", "pratt"]:
            if n_zero == len(d):
                raise ValueError("zero_method 'wilcox' and 'pratt' do not "
                                 "work if x - y is zero for all elements.")
        if zero_method == "wilcox":
            # Keep all non-zero differences
            # d = compress(np.not_equal(d, 0), d)
            d = np.array([_d for _d in d if _d != 0])

    count = len(d)
    if count < 10 and mode == "approx":
        ValueError(f"Sample size is too small for normal approximation, got n={count}.")

    r = scp.rankdata(abs(d))
    r_plus = np.sum((d > 0) * r)
    r_minus = np.sum((d < 0) * r)

    if alternative == "two-sided":
        T = min(r_plus, r_minus)
    else:
        T = r_plus

    # Estimation with approximation (dim < 25)
    if mode == "approx":
        mn = count * (count + 1.) * 0.25
        se = count * (count + 1.) * (2. * count + 1.)

        if zero_method == "pratt":
            r = r[d != 0]
            # normal approximation needs to be adjusted, see Cureton (1967)
            mn -= n_zero * (n_zero + 1.) * 0.25
            se -= n_zero * (n_zero + 1.) * (2. * n_zero + 1.)

        _, repnum = scp.find_repeats(r)
        if repnum.size != 0:
            # Correction for repeated elements.
            se -= 0.5 * (repnum * (repnum * repnum - 1)).sum()

        se = math.sqrt(se / 24)

        # apply continuity correction if applicable
        d = 0

        # compute statistic and p-value using normal approximation
        z = (T - mn - d) / se
        if alternative == "two-sided":
            p = 2. * scp.norm.sf(abs(z))
        elif alternative == "greater":
            # large T = r_plus indicates x is greater than y; i.e.
            # accept alternative in that case and return small p-value (sf)
            p = scp.norm.sf(z)
        else:
            p = scp.norm.cdf(z)
    # Exact estimation
    elif mode == "exact":
        # Get frequencies cnt of the possible positive ranksums r_plus
        cnt = scp._hypotests._get_wilcoxon_distr(count)
        # Note: r_plus is int (ties not allowed), need int for slices below
        r_plus = int(r_plus)
        if alternative == "two-sided":
            if r_plus == (len(cnt) - 1) // 2:
                # r_plus is the center of the distribution.
                p = 1.0
            else:
                p_less = np.sum(cnt[:r_plus + 1]) / 2**count
                p_greater = np.sum(cnt[r_plus:]) / 2**count
                p = 2 * min(p_greater, p_less)
        elif alternative == "greater":
            p = np.sum(cnt[r_plus:]) / 2**count
        else:
            p = np.sum(cnt[:r_plus + 1]) / 2**count

    cv = scp.norm.ppf(quant_order)

    _summ = test_summary(df=count, critical_value=cv, t_value=z,
                         p_value=p, alpha=alpha,
                         title='Wilcoxon test',
                         h0=h0, h1=h1,
                         extra=f' * The T-value is: {round(T, 5)}')

    if return_tuple:
        return z, cv, p
    else:
        print(_summ)


[docs]def dispersion_test(x, alpha=0.05, two_sided=True, return_tuple=False):
    """Dispersion test for count data.

        In the two-sided setup, we aim at testing:

        .. math:: H_{0}: \\mathbb{E}(X) = \\mathbb{V}(X) \\text{  against  } H_{1}: \\mathbb{E}(X) \\neq \\mathbb{V}(X)

        The p-value is computed as:

        .. math:: \\mathbb{P}(T \\leq t \\mid H_{0} \\text{ holds})

        with, under :math:`H_{0}`:

        .. math:: t = \\dfrac{\\dfrac{1}{n-1} \\sum_{i=1}^{n} (X_i - \\bar{X})^2 - \\bar{X}}{\\sqrt{\\dfrac{2}{n-1}} \\bar{X}} \\sim \\chi^2_{n-1}

    :param x: Input variable. Format can be :obj:`numpy.array`, :obj:`list` or :obj:`pandas.Series`.
    :type x: :obj:`numpy.array`
    :param alpha: Confidence level, defaults to 0.05.
    :type alpha: :obj:`float`, optional
    :param two_sided: Perform a two-sided test, defaults to True.
    :type two_sided: :obj:`bool`, optional
    :param return_tuple: Return a tuple with t statistic, critical value and p-value, defaults to False.
    :type return_tuple: :obj:`bool`, optional

    :example:

    >>> from statinf import stats
    >>> stats.dispersion_test([36, 31, 39, 25, 40, 36, 30, 36, 27, 35, 28, 37, 39, 39, 30])
    ... +------------------------------------------------------------+
    ... |                      Dispersion test                       |
    ... +------------+----------------+------------+---------+-------+
    ... |     df     | Critical value |    T-value | p-value |   H0  |
    ... +------------+----------------+------------+---------+-------+
    ... |         15 |   26.118948045 | -0.7499767 |     0.0 | False |
    ... +------------+----------------+------------+---------+-------+
    ...  * We reject H0, hence E(X) != V(X)
    ...  * E(X) = 33.87
    ...  * V(X) = 22.65

    :reference: * Böhning, D. (1994). A note on a test for Poisson overdispersion. Biometrika, 81(2), 418-419.
        * de Oliveira, J. T. (1965). Some elementary tests of mixtures of discrete distributions. Classical and contagious discrete distributions, 379-384.

    :return: Summary for the test or tuple statistic, critical value, p-value.
    :rtype: :obj:`str` or :obj:`tuple`
    """
    # Define test degrees of freedom
    if two_sided:
        quant_order = 1 - (alpha / 2)
        h0 = 'E(X) = V(X)'
        h1 = 'E(X) != V(X)'
    else:
        quant_order = 1 - alpha
        h0 = 'E(X) <= V(X)'
        h1 = 'E(X) > V(X)'

    # Input vector as array
    x = np.asarray(x)
    # Sample size
    n = len(x)
    # Compute empirical mean to estimate \lambda
    x_bar = x.mean()
    # Compute empirical variance
    S_2 = np.array([(i - x_bar)**2 for i in x]).sum()
    # Test statistic's numerator
    _num = 1 / (n - 1)
    _num *= S_2
    _num -= x_bar
    # Test statistic's denominator
    _denom = np.sqrt(2 / (n - 1))
    _denom *= x_bar
    # Test statistic
    t = _num / _denom

    cv = scp.chi2.ppf(quant_order, df=n - 1)

    p = scp.chi2.cdf(t, df=n - 1)

    extra = f" * E(X) = {x.mean():.2f}\n * V(X) = {x.var():.2f}"

    _summ = test_summary(df=n, critical_value=cv, t_value=t,
                         p_value=p, alpha=alpha,
                         title='Dispersion test',
                         h0=h0, h1=h1, extra=extra)

    if return_tuple:
        return t, cv, p
    else:
        print(_summ)