Source code for mics.mixtures

"""
.. module:: mixtures
   :platform: Unix, Windows
   :synopsis: a module for defining the class :class:`Mixture`.

.. moduleauthor:: Charlles R. A. Abreu <abreu@eq.ufrj.br>

"""

import numpy as np
import pandas as pd

import mics
from mics.funcs import deltaMethod
from mics.funcs import diff
from mics.funcs import func
from mics.utils import InputError
from mics.utils import bennett
from mics.utils import cases
from mics.utils import crypto
from mics.utils import errorTitle
from mics.utils import info
from mics.utils import multimap
from mics.utils import propertyDict
from mics.utils import stdError


[docs]class mixture:
    """
    A mixture of independently collected samples (MICS).

    Parameters
    ----------
        samples : :class:`pooledsample` or list(:class:`sample`)
            A list of samples.
        engine : :class:`MICS` or :class:`MBAR`
            A method for mixture-model analysis.

    """

    def __init__(self, samples, engine):

        self.samples = samples
        self.engine = engine
        m = self.m = len(samples)
        if mics.verbose:
            # np.set_printoptions(precision=4, threshold=15, edgeitems=4, suppress=True)
            info("\n=== Setting up mixture ===")
            info("Analysis method: ", self.engine.__class__.__name__)
            info("Number of samples:", m)

        if m == 0:
            raise InputError("list of samples is empty")

        self.n = np.array([len(sample.dataset) for sample in samples])
        self.neff = np.array([sample.neff for sample in samples])
        names = self.names = list(samples[0].dataset.columns)
        if mics.verbose:
            info("Sample sizes:", self.n)
            info("Effective sample sizes:", self.neff)
            info("Properties:", ", ".join(names))

        potentials = [sample.potential.lambdify() for sample in samples]
        self.u = [multimap(potentials, sample.dataset) for sample in samples]
        self.f = bennett(self.u)
        mics.verbose and info("Initial free-energy guess:", self.f)
        self.engine.__initialize__(self)

    # ======================================================================================
    def __compute__(self, functions, constants):
        try:
            if isinstance(functions, str):
                funcs = [func(functions, self.names, constants).lambdify()]
            else:
                funcs = [func(f, self.names, constants).lambdify() for f in functions]
            return [multimap(funcs, sample.dataset) for sample in self.samples]
        except (InputError, KeyError):
            return None

    # ======================================================================================
[docs]    def free_energies(self, reference=0):
        """
        Computes the free energies of all sampled states relative to a given
        reference state, as well as their standard errors.

        Parameters
        ----------
            reference : int, optional, default=0
                Specifies which sampled state will be considered as a reference
                for computing free-energy differences.

        Returns
        -------
            pandas.DataFrame
                A data frame containing the free-energy differences and their
                computed standard errors for all sampled states.

        """
        frame = self.samples.__qualifiers__()
        frame["f"] = self.f - self.f[reference]
        T = self.Theta
        frame["df"] = np.sqrt(np.diag(T) - 2*T[:, reference] + T[reference, reference])
        return frame

    # ======================================================================================
[docs]    def reweighting(self, potential, properties={}, derivatives={}, combinations={},
                    conditions={}, reference=0, **constants):
        """
        Computes averages of specified properties at target states defined by
        a given reduced `potential` function with distinct passed parameter
        values, as well as the free energies of such states with respect to a
        sampled `reference` state. Also, computes derivatives of these averages
        and free energies with respect to the mentioned parameters. In addition,
        evaluates combinations of free energies, averages, and derivatives. In
        all cases, uncertainty propagation is handled automatically by means of
        the delta method.

        Parameters
        ----------
            potential : str
                A mathematical expression defining the reduced potential of the
                target states. It might depend on the collective variables of
                the mixture samples, as well as on external parameters whose
                values will be passed via `conditions` or `constants`, such as
                explained below.
            properties : dict(str: str), optional, default={}
                A dictionary associating names to mathematical expressions, thus
                defining a set of properties whose averages must be evaluated at
                the target states. If it is omitted, then only the relative free
                energies of the target states will be evaluated. The expressions
                might depend on the same collective variables and parameters
                mentioned above for `potential`.
            derivatives : dict(str: (str, str)), optional, default={}
                A dictionary associating names to (property, parameter) pairs,
                thus specifying derivatives of average properties at the target
                states or relative free energies of these states with respect
                to external parameters. For each pair, property must be either
                "f" (for free energy) or a name defined in `properties`, while
                parameter must be an external parameter such as described above
                for `potential`.
            combinations : dict(str: str), optional, default={}
                A dictionary associating names to mathematical expressions, thus
                defining combinations among average properties at the target
                states, the relative free energies of these states, and their
                derivatives with respect to external parameters. The expressions
                might depend on "f" (for free energy) or on the names defined in
                `properties`, as well as on external parameters such as described
                above for `potential`.
            conditions : pandas.DataFrame or dict, optional, default={}
                A data frame whose column names are external parameters present
                in mathematical expressions specified in arguments `potential`,
                `properties`, and `combinations`. The rows of the data frame
                contain sets of values of these parameters, in such as way that
                the reweighting is carried out for every single set. This is a
                way of defining multiple target states from a single `potential`
                expression. The same information can be passed as a dictionary
                associating names to lists of numerical values, provided that
                all lists are equally sized. If it is empty, then a unique
                target state will be considered and all external parameters in
                `potential`, if any, must be passed as keyword arguments.
            reference : int, optional, default=0
                The index of a sampled state to be considered as a reference for
                computing relative free energies.
            **constants : keyword arguments
                A set of keyword arguments passed as name=value, aimed to define
                external parameter values for the evaluation of mathematical
                expressions. These values will be repeated at all target states
                specified via `potential` and `conditions`.

        Returns
        -------
            pandas.DataFrame
                A data frame containing the computed quantities, along with
                their estimated uncertainties, at all target states specified
                via `potential` and `conditions`.

        """
        if mics.verbose:
            info("\n=== Performing reweighting with %s ===" % self.engine.__class__.__name__)
            info("Reduced potential:", potential)
            constants and info("Provided constants: ", constants)

        freeEnergy = "f"
        if freeEnergy in properties.keys():
            raise InputError("Word % is reserved for free energies" % freeEnergy)
        condframe = pd.DataFrame(data=conditions) if isinstance(conditions, dict) else conditions
        propfuncs = list(properties.values())

        if not derivatives:
            propnames = [freeEnergy] + list(properties.keys())
            combs = combinations.values()

            gProps = self.__compute__(propfuncs, constants)
            if combinations:
                gDelta = deltaMethod(combs, propnames, constants)

            results = list()
            for (index, condition) in cases(condframe):
                mics.verbose and condition and info("Condition[%s]" % index, condition)
                consts = dict(condition, **constants)
                u = self.__compute__(potential, consts)
                y = gProps if gProps else self.__compute__(propfuncs, consts)
                (yu, Theta) = self.engine.__reweight__(self, u, y, reference)
                result = propertyDict(propnames, yu, stdError(Theta))
                if combinations:
                    delta = gDelta if gDelta.valid else deltaMethod(combs, propnames, consts)
                    (h, dh) = delta.evaluate(yu, Theta)
                    result.update(propertyDict(combinations.keys(), h, dh))
                results.append(result.to_frame(index))

            return condframe.join(pd.concat(results))

        else:
            symbols = list(condframe.columns) + list(constants.keys())
            parameters = set(x for (y, x) in derivatives.values())
            props = dict()
            for x in parameters:
                props[crypto(x)] = diff(potential, x, symbols)
            combs = dict()
            for (z, (y, x)) in derivatives.items():
                if y == freeEnergy:
                    combs[z] = crypto(x)
                else:
                    dydx = diff(properties[y], x, symbols)
                    props[crypto(z)] = "%s - (%s)*(%s)" % (dydx, props[crypto(x)], properties[y])
                    combs[z] = "%s + (%s)*(%s)" % (crypto(z), crypto(x), y)
            unwanted = sum([[x, errorTitle(x)] for x in props.keys()], [])
            return self.reweighting(potential, dict(properties, **props), {},
                                    dict(combs, **combinations), condframe, reference,
                                    **constants).drop(unwanted, axis=1)

    # ======================================================================================
    def pmf(self, potential, property, bins=10, interval=None, **constants):

        if mics.verbose:
            info("\n=== Computing PMF with %s ===" % self.engine.__class__.__name__)
            info("Reduced potential:", potential)
        u = self.__compute__(potential, constants)
        z = self.__compute__(property, constants)
        if interval:
            (zmin, zmax) = interval
        else:
            zmin = min(np.amin(x[0, :]) for x in z)
            zmax = max(np.amax(x[0, :]) for x in z)
        delta = (zmax - zmin)/bins
        ibin = [np.floor((x[0:1, :] - zmin)/delta).astype(int) for x in z]

        results = list()
        for i in range(bins):
            zc = zmin + delta*(i + 0.5)
            mics.verbose and info("Bin[%d]:" % (i + 1), "%s = %s" % (property, str(zc)))
            y = [np.equal(x, i).astype(np.float) for x in ibin]
            (yu, Theta) = self.engine.__reweight__(self, u, y)
            if yu[1] > 0.0:
                dyu = np.sqrt(max(0.0, Theta[1, 1]))
                results.append([zc, -np.log(yu[1]), dyu/yu[1]])

        return pd.DataFrame(results, columns=[property, "pmf", errorTitle("pmf")])

    # ======================================================================================
    def histograms(self, property="u0", bins=100, **constants):
        if property == "u0":
            y = self.u0
        elif property == "state":
            w = np.arange(self.m) + 1
            wsum = sum(w)
            y = [wsum*np.average(p, axis=0, weights=w) for p in self.P]
        elif property == "potential":
            y = [self.u[i][i, :] for i in range(self.m)]
        else:
            y = self.__compute__(property, constants)
        ymin = min([np.amin(x) for x in y])
        ymax = max([np.amax(x) for x in y])
        delta = (ymax - ymin)/bins
        center = [ymin + delta*(i + 0.5) for i in range(bins)]
        frame = pd.DataFrame({property: center})
        for i in range(self.m):
            frame["state %s" % (i+1)] = np.histogram(y[i], bins, (ymin, ymax))[0]
        return frame