Source code for mot.mcmc_diagnostics

"""This module contains some diagnostic functions to diagnose the performance of MCMC sample.

The two most important functions are :func:`multivariate_ess` and :func:`univariate_ess` to calculate the effective
sample size of your samples.
"""
from collections import Mapping
import itertools
import numpy as np
from numpy.linalg import det
from scipy.special import gammaln
from scipy.stats import chi2

from mot.lib.utils import multiprocess_mapping

__author__ = 'Robbert Harms'
__date__ = "2017-03-07"
__maintainer__ = "Robbert Harms"
__email__ = "robbert.harms@maastrichtuniversity.nl"


[docs]def multivariate_ess(samples, batch_size_generator=None):
    r"""Estimate the multivariate Effective Sample Size for the samples of every problem.

    This essentially applies :func:`estimate_multivariate_ess` to every problem.

    Args:
        samples (ndarray, dict or generator): either a matrix of shape (d, p, n) with d problems, p parameters and
            n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally,
            a generator function that yields sample arrays of shape (p, n).
        batch_size_generator (MultiVariateESSBatchSizeGenerator): the batch size generator, tells us how many
            batches and of which size we use in estimating the minimum ESS.

    Returns:
        ndarray: the multivariate ESS per problem
    """
    samples_generator = _get_sample_generator(samples)
    return np.array(multiprocess_mapping(_MultivariateESSMultiProcessing(batch_size_generator), samples_generator()))


class _MultivariateESSMultiProcessing:

    def __init__(self, batch_size_generator):
        """Used in the function :func:`multivariate_ess` to estimate the multivariate ESS using multiprocessing."""
        self._batch_size_generator = batch_size_generator

    def __call__(self, samples):
        return estimate_multivariate_ess(samples, batch_size_generator=self._batch_size_generator)


[docs]def univariate_ess(samples, method='standard_error', **kwargs):
    r"""Estimate the univariate Effective Sample Size for the samples of every problem.

    This computes the ESS using:

        .. math::

            ESS(X) = n * \frac{\lambda^{2}}{\sigma^{2}}

        Where :math:`\lambda` is the standard deviation of the chain and :math:`\sigma` is estimated using the
        monte carlo standard error (which in turn is, by default, estimated using a batch means estimator).

    Args:
        samples (ndarray, dict or generator): either a matrix of shape (d, p, n) with d problems, p parameters and
            n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally,
            a generator function that yields sample arrays of shape (p, n).
        method (str): one of 'autocorrelation' or 'standard_error' defaults to 'standard_error'.
            If 'autocorrelation' is chosen we apply the function: :func:`estimate_univariate_ess_autocorrelation`,
            if 'standard_error` is choosen we apply the function: :func:`estimate_univariate_ess_standard_error`.
        **kwargs: passed to the chosen compute method

    Returns:
        ndarray: a matrix of size (d, p) with for every problem and every parameter an ESS.

    References:
        * Flegal, J.M., Haran, M., and Jones, G.L. (2008). "Markov chain Monte Carlo: Can We
          Trust the Third Significant Figure?". Statistical Science, 23, p. 250-260.
        * Marc S. Meketon and Bruce Schmeiser. 1984. Overlapping batch means: something for nothing?.
          In Proceedings of the 16th conference on Winter simulation (WSC '84), Sallie Sheppard (Ed.).
          IEEE Press, Piscataway, NJ, USA, 226-230.
    """
    samples_generator = _get_sample_generator(samples)
    return np.array(multiprocess_mapping(_UnivariateESSMultiProcessing(method, **kwargs), samples_generator()))


class _UnivariateESSMultiProcessing:

    def __init__(self, method, **kwargs):
        """Used in the function :func:`univariate_ess` to estimate the univariate ESS using multiprocessing."""
        self._method = method
        self._kwargs = kwargs

    def __call__(self, samples):
        if self._method == 'autocorrelation':
            compute_func = estimate_univariate_ess_autocorrelation
        else:
            compute_func = estimate_univariate_ess_standard_error

        result = np.zeros(samples.shape[0])
        for param_ind in range(samples.shape[0]):
            result[param_ind] = compute_func(samples[param_ind], **self._kwargs)

        return result


def _get_sample_generator(samples):
    """Get a sample generator from the given polymorphic input.

    Args:
        samples (ndarray, dict or generator): either an matrix of shape (d, p, n) with d problems, p parameters and
            n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally,
            a generator function that yields sample arrays of shape (p, n).

    Returns:
        generator: a generator that yields a matrix of size (p, n) for every problem in the input.
    """
    if isinstance(samples, Mapping):
        def samples_generator():
            for ind in range(samples[list(samples.keys())[0]].shape[0]):
                yield np.array([samples[s][ind, :] for s in sorted(samples)])
    elif isinstance(samples, np.ndarray):
        def samples_generator():
            for ind in range(samples.shape[0]):
                yield samples[ind]
    else:
        samples_generator = samples
    return samples_generator


[docs]def get_auto_correlation(chain, lag):
    r"""Estimates the auto correlation for the given chain (1d vector) with the given lag.

    Given a lag :math:`k`, the auto correlation coefficient :math:`\rho_{k}` is estimated as:

    .. math::

        \hat{\rho}_{k} = \frac{E[(X_{t} - \mu)(X_{t + k} - \mu)]}{\sigma^{2}}

    Please note that this equation only works for lags :math:`k < n` where :math:`n` is the number of samples in
    the chain.

    Args:
        chain (ndarray): the vector with the samples
        lag (int): the lag to use in the autocorrelation computation

    Returns:
        float: the autocorrelation with the given lag
    """
    normalized_chain = chain - np.mean(chain, dtype=np.float64)
    lagged_mean = np.mean(normalized_chain[:len(chain) - lag] * normalized_chain[lag:], dtype=np.float64)
    return lagged_mean / np.var(chain, dtype=np.float64)


[docs]def get_auto_correlation_time(chain, max_lag=None):
    r"""Compute the auto correlation time up to the given lag for the given chain (1d vector).

    This will halt when the maximum lag :math:`m` is reached or when the sum of two consecutive lags for any
    odd lag is lower or equal to zero.

    The auto correlation sum is estimated as:

    .. math::

        \tau = 1 + 2 * \sum_{k=1}^{m}{\rho_{k}}


    Where :math:`\rho_{k}` is estimated as:

    .. math::

        \hat{\rho}_{k} = \frac{E[(X_{t} - \mu)(X_{t + k} - \mu)]}{\sigma^{2}}

    Args:
        chain (ndarray): the vector with the samples
        max_lag (int): the maximum lag to use in the autocorrelation computation. If not given we use:
            :math:`min(n/3, 1000)`.
    """
    max_lag = max_lag or min(len(chain) // 3, 1000)

    normalized_chain = chain - np.mean(chain, dtype=np.float64)

    previous_accoeff = 0
    auto_corr_sum = 0

    for lag in range(1, max_lag):
        auto_correlation_coeff = np.mean(normalized_chain[:len(chain) - lag] * normalized_chain[lag:], dtype=np.float64)

        if lag % 2 == 0:
            if previous_accoeff + auto_correlation_coeff <= 0:
                break

        auto_corr_sum += auto_correlation_coeff
        previous_accoeff = auto_correlation_coeff

    return auto_corr_sum / np.var(chain, dtype=np.float64)


[docs]def estimate_univariate_ess_autocorrelation(chain, max_lag=None):
    r"""Estimate effective sample size (ESS) using the autocorrelation of the chain.

    The ESS is an estimate of the size of an iid sample with the same variance as the current sample.
    This function implements the ESS as described in Kass et al. (1998) and Robert and Casella (2004; p. 500):

    .. math::

        ESS(X) = \frac{n}{\tau} = \frac{n}{1 + 2 * \sum_{k=1}^{m}{\rho_{k}}}

    where :math:`\rho_{k}` is estimated as:

    .. math::

        \hat{\rho}_{k} = \frac{E[(X_{t} - \mu)(X_{t + k} - \mu)]}{\sigma^{2}}

    References:
        * Kass, R. E., Carlin, B. P., Gelman, A., and Neal, R. (1998)
            Markov chain Monte Carlo in practice: A roundtable discussion. The American Statistician, 52, 93--100.
        * Robert, C. P. and Casella, G. (2004) Monte Carlo Statistical Methods. New York: Springer.
        * Geyer, C. J. (1992) Practical Markov chain Monte Carlo. Statistical Science, 7, 473--483.

    Args:
        chain (ndarray): the chain for which to calculate the ESS, assumes a vector of length ``n`` samples
        max_lag (int): the maximum lag used in the variance calculations. If not given defaults to
            :math:`min(n/3, 1000)`.

    Returns:
        float: the estimated ESS
    """
    return len(chain) / (1 + 2 * get_auto_correlation_time(chain, max_lag))


[docs]def estimate_univariate_ess_standard_error(chain, batch_size_generator=None, compute_method=None):
    r"""Compute the univariate ESS using the standard error method.

    This computes the ESS using:

    .. math::

        ESS(X) = n * \frac{\lambda^{2}}{\sigma^{2}}

    Where :math:`\lambda` is the standard deviation of the chain and :math:`\sigma` is estimated using the monte carlo
    standard error (which in turn is, by default, estimated using a batch means estimator).

    Args:
        chain (ndarray): the Markov chain
        batch_size_generator (UniVariateESSBatchSizeGenerator): the method that generates that batch sizes
            we will use. Per default it uses the :class:`SquareRootSingleBatch` method.
        compute_method (ComputeMonteCarloStandardError): the method used to compute the standard error.
            By default we will use the :class:`BatchMeansMCSE` method

    Returns:
        float: the estimated ESS
    """
    sigma = (monte_carlo_standard_error(chain, batch_size_generator=batch_size_generator,
                                        compute_method=compute_method) ** 2 * len(chain))
    lambda_ = np.var(chain, dtype=np.float64)
    return len(chain) * (lambda_ / sigma)


[docs]def minimum_multivariate_ess(nmr_params, alpha=0.05, epsilon=0.05):
    r"""Calculate the minimum multivariate Effective Sample Size you will need to obtain the desired precision.

    This implements the inequality from Vats et al. (2016):

    .. math::

        \widehat{ESS} \geq \frac{2^{2/p}\pi}{(p\Gamma(p/2))^{2/p}} \frac{\chi^{2}_{1-\alpha,p}}{\epsilon^{2}}

    Where :math:`p` is the number of free parameters.

    Args:
        nmr_params (int): the number of free parameters in the model
        alpha (float): the level of confidence of the confidence region. For example, an alpha of 0.05 means
            that we want to be in a 95% confidence region.
        epsilon (float): the level of precision in our multivariate ESS estimate.
            An epsilon of 0.05 means that we expect that the Monte Carlo error is 5% of the uncertainty in
            the target distribution.

    Returns:
        float: the minimum multivariate Effective Sample Size that one should aim for in MCMC sample to
            obtain the desired confidence region with the desired precision.

    References:
        Vats D, Flegal J, Jones G (2016). Multivariate Output Analysis for Markov Chain Monte Carlo.
        arXiv:1512.07713v2 [math.ST]
    """
    tmp = 2.0 / nmr_params
    log_min_ess = tmp * np.log(2) + np.log(np.pi) - tmp * (np.log(nmr_params) + gammaln(nmr_params / 2)) \
                  + np.log(chi2.ppf(1 - alpha, nmr_params)) - 2 * np.log(epsilon)
    return int(round(np.exp(log_min_ess)))


[docs]def multivariate_ess_precision(nmr_params, multi_variate_ess, alpha=0.05):
    r"""Calculate the precision given your multivariate Effective Sample Size.

    Given that you obtained :math:`ESS` multivariate effective samples in your estimate you can calculate the
    precision with which you approximated your desired confidence region.

    This implements the inequality from Vats et al. (2016), slightly restructured to give :math:`\epsilon` back instead
    of the minimum ESS.

    .. math::

         \epsilon = \sqrt{\frac{2^{2/p}\pi}{(p\Gamma(p/2))^{2/p}} \frac{\chi^{2}_{1-\alpha,p}}{\widehat{ESS}}}

    Where :math:`p` is the number of free parameters and ESS is the multivariate ESS from your samples.

    Args:
        nmr_params (int): the number of free parameters in the model
        multi_variate_ess (int): the number of iid samples you obtained in your sample results.
        alpha (float): the level of confidence of the confidence region. For example, an alpha of 0.05 means
            that we want to be in a 95% confidence region.

    Returns:
        float: the minimum multivariate Effective Sample Size that one should aim for in MCMC sample to
            obtain the desired confidence region with the desired precision.

    References:
        Vats D, Flegal J, Jones G (2016). Multivariate Output Analysis for Markov Chain Monte Carlo.
        arXiv:1512.07713v2 [math.ST]
    """
    tmp = 2.0 / nmr_params
    log_min_ess = tmp * np.log(2) + np.log(np.pi) - tmp * (np.log(nmr_params) + gammaln(nmr_params / 2)) \
                  + np.log(chi2.ppf(1 - alpha, nmr_params)) - np.log(multi_variate_ess)
    return np.sqrt(np.exp(log_min_ess))


[docs]def estimate_multivariate_ess_sigma(samples, batch_size):
    r"""Calculates the Sigma matrix which is part of the multivariate ESS calculation.

    This implementation is based on the Matlab implementation found at: https://github.com/lacerbi/multiESS

    The Sigma matrix is defined as:

    .. math::

        \Sigma = \Lambda + 2 * \sum_{k=1}^{\infty}{Cov(Y_{1}, Y_{1+k})}

    Where :math:`Y` are our samples and :math:`\Lambda` is the covariance matrix of the samples.

    This implementation computes the :math:`\Sigma` matrix using a Batch Mean estimator using the given batch size.
    The batch size has to be :math:`1 \le b_n \le n` and a typical value is either :math:`\lfloor n^{1/2} \rfloor`
    for slow mixing chains or :math:`\lfloor n^{1/3} \rfloor` for reasonable mixing chains.

    If the length of the chain is longer than the sum of the length of all the batches, this implementation
    calculates :math:`\Sigma` for every offset and returns the average of those offsets.

    Args:
        samples (ndarray): the samples for which we compute the sigma matrix. Expects an (p, n) array with
            p the number of parameters and n the sample size
        batch_size (int): the batch size used in the approximation of the correlation covariance

    Returns:
        ndarray: an pxp array with p the number of parameters in the samples.

    References:
        Vats D, Flegal J, Jones G (2016). Multivariate Output Analysis for Markov Chain Monte Carlo.
        arXiv:1512.07713v2 [math.ST]
    """
    sample_means = np.mean(samples, axis=1, dtype=np.float64)
    nmr_params, chain_length = samples.shape

    nmr_batches = int(np.floor(chain_length / batch_size))
    sigma = np.zeros((nmr_params, nmr_params))

    nmr_offsets = chain_length - nmr_batches * batch_size + 1

    for offset in range(nmr_offsets):
        batches = np.reshape(samples[:, np.array(offset + np.arange(0, nmr_batches * batch_size), dtype=np.int)].T,
                             [batch_size, nmr_batches, nmr_params], order='F')

        batch_means = np.squeeze(np.mean(batches, axis=0, dtype=np.float64))

        Z = batch_means - sample_means

        for x, y in itertools.product(range(nmr_params), range(nmr_params)):
            sigma[x, y] += np.sum(Z[:, x] * Z[:, y])

    return sigma * batch_size / (nmr_batches - 1) / nmr_offsets


[docs]def estimate_multivariate_ess(samples, batch_size_generator=None, full_output=False):
    r"""Compute the multivariate Effective Sample Size of your (single instance set of) samples.

    This multivariate ESS is defined in Vats et al. (2016) and is given by:

    .. math::

        ESS = n \bigg(\frac{|\Lambda|}{|\Sigma|}\bigg)^{1/p}

    Where :math:`n` is the number of samples, :math:`p` the number of parameters, :math:`\Lambda` is the covariance
    matrix of the parameters and :math:`\Sigma` captures the covariance structure in the target together with
    the covariance due to correlated samples. :math:`\Sigma` is estimated using
    :func:`estimate_multivariate_ess_sigma`.

    In the case of NaN in any part of the computation the ESS is set to 0.

    To compute the multivariate ESS for multiple problems, please use :func:`multivariate_ess`.

    Args:
        samples (ndarray): an pxn matrix with for p parameters and n samples.
        batch_size_generator (MultiVariateESSBatchSizeGenerator): the batch size generator, tells us how many
            batches and of which size we use for estimating the minimum ESS. Defaults to :class:`SquareRootSingleBatch`
        full_output (boolean): set to True to return the estimated :math:`\Sigma` and the optimal batch size.

    Returns:
        float or tuple: when full_output is set to True we return a tuple with the estimated multivariate ESS,
            the estimated :math:`\Sigma` matrix and the optimal batch size. When full_output is False (the default)
            we only return the ESS.

    References:
        Vats D, Flegal J, Jones G (2016). Multivariate Output Analysis for Markov Chain Monte Carlo.
        arXiv:1512.07713v2 [math.ST]
    """
    batch_size_generator = batch_size_generator or SquareRootSingleBatch()

    batch_sizes = batch_size_generator.get_multivariate_ess_batch_sizes(*samples.shape)

    nmr_params, chain_length = samples.shape
    nmr_batches = len(batch_sizes)

    det_lambda = det(np.cov(samples))

    ess_estimates = np.zeros(nmr_batches)
    sigma_estimates = np.zeros((nmr_params, nmr_params, nmr_batches))

    for i in range(0, nmr_batches):
        sigma = estimate_multivariate_ess_sigma(samples, int(batch_sizes[i]))
        ess = chain_length * (det_lambda**(1.0 / nmr_params) / det(sigma)**(1.0 / nmr_params))

        ess_estimates[i] = ess
        sigma_estimates[..., i] = sigma

    ess_estimates = np.nan_to_num(ess_estimates)

    if nmr_batches > 1:
        idx = np.argmin(ess_estimates)
    else:
        idx = 0

    if full_output:
        return ess_estimates[idx], sigma_estimates[..., idx], batch_sizes[idx]
    return ess_estimates[idx]


[docs]def monte_carlo_standard_error(chain, batch_size_generator=None, compute_method=None):
    """Compute Monte Carlo standard errors for the expectations

    This is a convenience function that calls the compute method for each batch size and returns the lowest ESS
    over the used batch sizes.

    Args:
        chain (ndarray): the Markov chain
        batch_size_generator (UniVariateESSBatchSizeGenerator): the method that generates that batch sizes
            we will use. Per default it uses the :class:`SquareRootSingleBatch` method.
        compute_method (ComputeMonteCarloStandardError): the method used to compute the standard error.
            By default we will use the :class:`BatchMeansMCSE` method
    """
    batch_size_generator = batch_size_generator or SquareRootSingleBatch()
    compute_method = compute_method or BatchMeansMCSE()

    batch_sizes = batch_size_generator.get_univariate_ess_batch_sizes(len(chain))

    return np.min(list(compute_method.compute_standard_error(chain, b) for b in batch_sizes))


[docs]class MultiVariateESSBatchSizeGenerator:
    """Objects of this class are used as input to the multivariate ESS function.

    The multivariate ESS function needs to have at least one batch size to use during the computations. More batch
    sizes are also possible and the batch size with the lowest ESS is then preferred.
    Objects of this class implement the logic behind choosing batch sizes.
    """

[docs]    def get_multivariate_ess_batch_sizes(self, nmr_params, chain_length):
        r"""Get the batch sizes to use for the calculation of the Effective Sample Size (ESS).

        This should return a list of batch sizes that the ESS calculation will use to determine :math:`\Sigma`

        Args:
            nmr_params (int): the number of parameters in the samples
            chain_length (int): the length of the chain

        Returns:
            list: the batches of the given sizes we will test in the ESS calculations
        """


[docs]class UniVariateESSBatchSizeGenerator:
    """Objects of this class are used as input to the univariate ESS function that uses the batch means.

    The univariate batch means ESS function needs to have at least one batch size to use during the computations.
    More batch sizes are also possible and the batch size with the lowest ESS is then preferred.
    Objects of this class implement the logic behind choosing batch sizes.
    """

[docs]    def get_univariate_ess_batch_sizes(self, chain_length):
        r"""Get the batch sizes to use for the calculation of the univariate Effective Sample Size (ESS).

        This should return a list of batch sizes that the ESS calculation will use to determine :math:`\sigma`

        Args:
            chain_length (int): the length of the chain

        Returns:
            list: the batches of the given sizes we will test in the ESS calculations
        """


[docs]class SquareRootSingleBatch(MultiVariateESSBatchSizeGenerator, UniVariateESSBatchSizeGenerator):
    r"""Returns :math:`\sqrt(n)`."""

[docs]    def get_multivariate_ess_batch_sizes(self, nmr_params, chain_length):
        return [np.floor(chain_length**(1/2.0))]

[docs]    def get_univariate_ess_batch_sizes(self, chain_length):
        return [np.floor(chain_length ** (1 / 2.0))]


[docs]class CubeRootSingleBatch(MultiVariateESSBatchSizeGenerator, UniVariateESSBatchSizeGenerator):
    r"""Returns :math:`n^{1/3}`."""

[docs]    def get_multivariate_ess_batch_sizes(self, nmr_params, chain_length):
        return [np.floor(chain_length**(1/3.0))]

[docs]    def get_univariate_ess_batch_sizes(self, chain_length):
        return [np.floor(chain_length ** (1 / 3.0))]


[docs]class LinearSpacedBatchSizes(MultiVariateESSBatchSizeGenerator):

    def __init__(self, nmr_batches=200):
        r"""Returns a number of batch sizes from which the ESS algorithm will select the one with the lowest ESS.

        This is a conservative choice since the lowest ESS of all batch sizes is chosen.

        The batch sizes are generated as linearly spaced values in:

        .. math::

            \Big[ n^{1/4}, max(\lfloor x/max(20,p) \rfloor, \lfloor \sqrt{n} \rfloor) \Big]

        where :math:`n` is the chain length and :math:`p` is the number of parameters.

        Args:
            nmr_batches (int): the number of linearly spaced batches we will generate.
        """
        self._nmr_batches = nmr_batches

[docs]    def get_multivariate_ess_batch_sizes(self, nmr_params, chain_length):
        b_min = np.floor(chain_length**(1 / 4.0))
        b_max = np.max((np.floor(chain_length / np.max((nmr_params, 20))),
                        np.floor(chain_length**(1 / 2.0))))
        return list(np.unique(np.round(np.exp(np.linspace(np.log(b_min), np.log(b_max), self._nmr_batches)))))


[docs]class ComputeMonteCarloStandardError:
    """Method to compute the Monte Carlo Standard error."""

[docs]    def compute_standard_error(self, chain, batch_size):
        """Compute the standard error of the given chain and the given batch size.

        Args:
            chain (ndarray): the chain for which to compute the SE
            batch_size (int): batch size or window size to use in the computations

        Returns:
            float: the Monte Carlo Standard Error
        """
        raise NotImplementedError()


[docs]class BatchMeansMCSE(ComputeMonteCarloStandardError):
    """Computes the Monte Carlo Standard Error using simple batch means."""

[docs]    def compute_standard_error(self, chain, batch_size):
        nmr_batches = int(np.floor(len(chain) / batch_size))

        chain_mean = np.mean(chain, dtype=np.float64)
        var_sum = 0

        for batch_index in range(nmr_batches):
            batch_mean = np.mean(chain[int(batch_index * batch_size):int((batch_index + 1) * batch_size)],
                                 dtype=np.float64)
            var_sum += (batch_mean - chain_mean)**2

        var_hat = batch_size * var_sum / (nmr_batches - 1)
        return np.sqrt(var_hat / len(chain))


[docs]class OverlappingBatchMeansMCSE(ComputeMonteCarloStandardError):
    """Computes the Monte Carlo Standard Error using overlapping batch means."""

[docs]    def compute_standard_error(self, chain, batch_size):
        nmr_batches = int(len(chain) - batch_size + 1)

        chain_mean = np.mean(chain, dtype=np.float64)
        var_sum = 0

        for batch_index in range(nmr_batches):
            batch_mean = np.mean(chain[int(batch_index):int(batch_index + batch_size)], dtype=np.float64)
            var_sum += (batch_mean - chain_mean) ** 2

        var_hat = (len(chain) * batch_size * var_sum) / (nmr_batches - 1) / nmr_batches
        return np.sqrt(var_hat / len(chain))