Source code for mics.mixtures

"""
.. module:: mixtures
   :platform: Unix, Windows
   :synopsis: a module for defining the class :class:`Mixture`.

.. moduleauthor:: Charlles R. A. Abreu <abreu@eq.ufrj.br>


"""

import numpy as np
import pandas as pd
from numpy.linalg import multi_dot

from mics.samples import pool
from mics.utils import InputError
from mics.utils import cases
from mics.utils import derivative
from mics.utils import genfunc
from mics.utils import info
from mics.utils import jacobian
from mics.utils import multimap
from mics.utils import overlapSampling


[docs]class mixture:
    """A mixture of independently collected samples (MICS)

        Args:
            samples (list or tuple):
                a list of samples.
            title (str, optional):
                a title.
            verbose (bool, optional):
                a verbosity tag.
            tol (float, optional):
                a tolerance.

    """

    # ======================================================================================
    def __define__(self, samples, title, verbose):

        np.set_printoptions(precision=4, threshold=15, edgeitems=4, suppress=True)

        self.title = title
        self.verbose = verbose
        self.method = type(self).__name__
        verbose and info("Setting up %s case:" % self.method, title)

        m = self.m = len(samples)
        if m == 0:
            raise InputError("list of samples is empty")
        verbose and info("Number of samples:", m)

        if type(samples) is pool:
            self.samples = samples.samples
            self.label = samples.label
        else:
            self.samples = samples
            self.label = ""

        names = self.names = list(samples[0].dataset.columns)
        if any(list(s.dataset.columns) != names for s in samples):
            raise InputError("provided samples have distinct properties")
        verbose and info("Properties:", ", ".join(names))

        n = self.n = np.array([s.dataset.shape[0] for s in samples])
        verbose and info("Sample sizes:", str(self.n))

        potentials = [s.potential for s in samples]
        self.u = [multimap(potentials, s.dataset) for s in samples]

        self.f = overlapSampling(self.u)
        verbose and info("Initial free-energy guess:", self.f)

        neff = self.neff = np.array([s.neff for s in samples])

        self.frame = pd.DataFrame(index=np.arange(m) + 1)
        if self.label:
            self.states = ["%s = %s" % (self.label, s.label) for s in samples]
            self.frame[self.label] = [s.label for s in samples]
        else:
            self.states = ["state %d" % (i+1) for i in range(m)]

        return m, n, neff

    # ======================================================================================
    def compute(self, functions, constants):
        if isinstance(functions, str):
            funcs = [genfunc(functions, self.names, constants)]
        else:
            funcs = [genfunc(f, self.names, constants) for f in functions]
        return [multimap(funcs, s.dataset) for s in self.samples]

    # ======================================================================================
[docs]    def free_energies(self, reference=0):
        """
        Returns a data frame containing the relative free energies of the datasetd samples
        of a `mixture`, as well as their standard errors.

        """
        frame = self.frame.copy()
        frame["f"] = self.f - self.f[reference]
        T = self.Theta
        frame["df"] = np.sqrt(np.diag(T) - 2*T[:, reference] + T[reference, reference])
        return frame

    # ======================================================================================
[docs]    def reweighting(self,
                    potential,
                    properties={},
                    derivatives={},
                    combinations={},
                    conditions=pd.DataFrame(),
                    reference=0,
                    **kwargs):
        """
        Performs reweighting of the properties computed by `functions` from the mixture to
        the samples determined by the provided `potential` with all `parameter` values.

        Args:
            potential (string):
            properties (dict of strings):
            combinations (dict of strings):
            derivatives (dict of tuples):
            conditions (pandas.DataFrame):
            verbose (boolean):
            **kwargs:

        """
        # TODO: look for duplicated names or reserved keyword 'f' in properties
        # TODO: look for duplicated names in properties, derivatives, and combinations
        # TODO: allow limited recursion in combinations

        names = ['f'] + list(properties.keys())

        if self.verbose:
            info("Reweighting requested in %s case:" % self.method, self.title)
            info("Reduced potential:", potential)
            kwargs and info("Provided constants: ", kwargs)

        if not derivatives:

            try:
                y = self.compute(properties.values(), kwargs)
                properties_needed = False
            except (InputError, KeyError):
                properties_needed = True

            if (combinations):
                try:
                    func, Jac = jacobian(combinations.values(), names, kwargs)
                    jacobian_needed = False
                except InputError:
                    jacobian_needed = True

            results = list()
            for constants in cases(conditions, kwargs, self.verbose):
                u = self.compute(potential, constants)
                if properties_needed:
                    y = self.compute(properties.values(), constants)
                g, Theta = self.__reweight__(u, y, reference)
                dg = np.sqrt(np.diagonal(Theta))

                if (combinations):
                    if jacobian_needed:
                        func, Jac = jacobian(combinations.values(), names, constants)
                    h = func(g).flatten()
                    J = Jac(g)
                    dh = np.sqrt(np.diagonal(multi_dot([J, Theta, J.T])))
                    results.append(np.block([[g, h], [dg, dh]]).T.flatten())
                else:
                    results.append(np.stack([g, dg]).T.flatten())

            header = sum([[x, "d"+x] for x in names + list(combinations.keys())], [])
#             if conditions.empty:
#                 return dict(zip(header, results[0]))
#             else:
#                 return pd.concat([conditions, pd.DataFrame(results, columns=header)], 1)
            return pd.concat([conditions, pd.DataFrame(results, columns=header)], 1)

        else:

            def dec(x):
                return "__%s__" % x

            parameters = list(conditions.columns) + list(kwargs.keys())
            zyx = [(key, value[0], value[1]) for key, value in derivatives.items()]

            props = {}
            combs = {}
            for x in set(x for z, y, x in zyx):
                props[dec(x)] = derivative(potential, x, parameters)

            for z, y, x in zyx:
                if y == 'f':
                    combs[z] = dec(x)
                else:
                    dydx = derivative(properties[y], x, parameters)
                    props[dec(z)] = "%s - (%s)*(%s)" % (dydx, props[dec(x)], properties[y])
                    combs[z] = "%s + (%s)*(%s)" % (dec(z), dec(x), y)

            unwanted = sum([[x, "d"+x] for x in props.keys()], [])

            return self.reweighting(potential, dict(properties, **props), {},
                                    dict(combs, **combinations), conditions, reference,
                                    **kwargs).drop(unwanted, axis=1)

    # ======================================================================================
    def pmf(self,
            potential,
            property,
            bins=10,
            interval=None,
            **kwargs):

        self.verbose and info("PMF requested - %s case:" % self.method, self.title)
        self.verbose and info("Reduced potential:", potential)
        u = self.compute(potential, kwargs)

        z = self.compute(property, kwargs)
        if interval:
            (zmin, zmax) = interval
        else:
            zmin = min(np.amin(x[0, :]) for x in z)
            zmax = max(np.amax(x[0, :]) for x in z)
        delta = (zmax - zmin)/bins
        ibin = [np.floor((x[0:1, :] - zmin)/delta).astype(int) for x in z]

        results = list()
        for i in range(bins):
            zc = zmin + delta*(i + 0.5)
            self.verbose and info("Bin[%d]:" % (i + 1), "%s = %s" % (property, str(zc)))
            y = [np.equal(x, i).astype(np.float) for x in ibin]
            yu, Theta = self.__reweight__(u, y)
            if yu[1] > 0.0:
                dyu = np.sqrt(Theta[1, 1])
                print([zc, -np.log(yu[1]), dyu/yu[1]])
                results.append([zc, -np.log(yu[1]), dyu/yu[1]])

        return pd.DataFrame(results, columns=[property, "pmf", "d_pmf"])

    # ======================================================================================
    def histograms(self, property="u0", bins=100, **kwargs):
        if property == "u0":
            y = self.u0
        elif property == "state":
            w = np.arange(self.m) + 1
            wsum = sum(w)
            y = [wsum*np.average(p, axis=0, weights=w) for p in self.P]
        elif property == "potential":
            y = [self.u[i][i, :] for i in range(self.m)]
        else:
            y = self.compute(property, kwargs)
        ymin = min([np.amin(x) for x in y])
        ymax = max([np.amax(x) for x in y])
        delta = (ymax - ymin)/bins
        center = [ymin + delta*(i + 0.5) for i in range(bins)]
        frame = pd.DataFrame({property: center})
        for (i, s) in enumerate(self.states):
            frame[s] = np.histogram(y[i], bins, (ymin, ymax))[0]
        return frame