Source code for mics.mixtures

"""
.. module:: mixtures
   :platform: Unix, Windows
   :synopsis: a module for defining the class :class:`Mixture`.

.. moduleauthor:: Charlles R. A. Abreu <abreu@eq.ufrj.br>

"""

import numpy as np
import pandas as pd

import mics
from mics.funcs import deltaMethod
from mics.funcs import diff
from mics.funcs import func
from mics.utils import InputError
from mics.utils import bennett
from mics.utils import cases
from mics.utils import crypto
from mics.utils import errorTitle
from mics.utils import info
from mics.utils import multimap
from mics.utils import propertyDict
from mics.utils import stdError


[docs]class mixture: """ A mixture of independently collected samples (MICS). Parameters ---------- samples : :class:`pooledsample` or list(:class:`sample`) A list of samples. engine : :class:`MICS` or :class:`MBAR` A method for mixture-model analysis. """ def __init__(self, samples, engine): self.samples = samples self.engine = engine m = self.m = len(samples) if mics.verbose: # np.set_printoptions(precision=4, threshold=15, edgeitems=4, suppress=True) info("\n=== Setting up mixture ===") info("Analysis method: ", self.engine.__class__.__name__) info("Number of samples:", m) if m == 0: raise InputError("list of samples is empty") self.n = np.array([len(sample.dataset) for sample in samples]) self.neff = np.array([sample.neff for sample in samples]) names = self.names = list(samples[0].dataset.columns) if mics.verbose: info("Sample sizes:", self.n) info("Effective sample sizes:", self.neff) info("Properties:", ", ".join(names)) potentials = [sample.potential.lambdify() for sample in samples] self.u = [multimap(potentials, sample.dataset) for sample in samples] self.f = bennett(self.u) mics.verbose and info("Initial free-energy guess:", self.f) self.engine.__initialize__(self) # ====================================================================================== def __compute__(self, functions, constants): try: if isinstance(functions, str): funcs = [func(functions, self.names, constants).lambdify()] else: funcs = [func(f, self.names, constants).lambdify() for f in functions] return [multimap(funcs, sample.dataset) for sample in self.samples] except (InputError, KeyError): return None # ======================================================================================
[docs] def free_energies(self, reference=0): """ Computes the free energies of all sampled states relative to a given reference state, as well as their standard errors. Parameters ---------- reference : int, optional, default=0 Specifies which sampled state will be considered as a reference for computing free-energy differences. Returns ------- pandas.DataFrame A data frame containing the free-energy differences and their computed standard errors for all sampled states. """ frame = self.samples.__qualifiers__() frame["f"] = self.f - self.f[reference] T = self.Theta frame["df"] = np.sqrt(np.diag(T) - 2*T[:, reference] + T[reference, reference]) return frame
# ======================================================================================
[docs] def reweighting(self, potential, properties={}, derivatives={}, combinations={}, conditions={}, reference=0, **constants): """ Computes averages of specified properties at target states defined by a given reduced `potential` function with distinct passed parameter values, as well as the free energies of such states with respect to a sampled `reference` state. Also, computes derivatives of these averages and free energies with respect to the mentioned parameters. In addition, evaluates combinations of free energies, averages, and derivatives. In all cases, uncertainty propagation is handled automatically by means of the delta method. Parameters ---------- potential : str A mathematical expression defining the reduced potential of the target states. It might depend on the collective variables of the mixture samples, as well as on external parameters whose values will be passed via `conditions` or `constants`, such as explained below. properties : dict(str: str), optional, default={} A dictionary associating names to mathematical expressions, thus defining a set of properties whose averages must be evaluated at the target states. If it is omitted, then only the relative free energies of the target states will be evaluated. The expressions might depend on the same collective variables and parameters mentioned above for `potential`. derivatives : dict(str: (str, str)), optional, default={} A dictionary associating names to (property, parameter) pairs, thus specifying derivatives of average properties at the target states or relative free energies of these states with respect to external parameters. For each pair, property must be either "f" (for free energy) or a name defined in `properties`, while parameter must be an external parameter such as described above for `potential`. combinations : dict(str: str), optional, default={} A dictionary associating names to mathematical expressions, thus defining combinations among average properties at the target states, the relative free energies of these states, and their derivatives with respect to external parameters. The expressions might depend on "f" (for free energy) or on the names defined in `properties`, as well as on external parameters such as described above for `potential`. conditions : pandas.DataFrame or dict, optional, default={} A data frame whose column names are external parameters present in mathematical expressions specified in arguments `potential`, `properties`, and `combinations`. The rows of the data frame contain sets of values of these parameters, in such as way that the reweighting is carried out for every single set. This is a way of defining multiple target states from a single `potential` expression. The same information can be passed as a dictionary associating names to lists of numerical values, provided that all lists are equally sized. If it is empty, then a unique target state will be considered and all external parameters in `potential`, if any, must be passed as keyword arguments. reference : int, optional, default=0 The index of a sampled state to be considered as a reference for computing relative free energies. **constants : keyword arguments A set of keyword arguments passed as name=value, aimed to define external parameter values for the evaluation of mathematical expressions. These values will be repeated at all target states specified via `potential` and `conditions`. Returns ------- pandas.DataFrame A data frame containing the computed quantities, along with their estimated uncertainties, at all target states specified via `potential` and `conditions`. """ if mics.verbose: info("\n=== Performing reweighting with %s ===" % self.engine.__class__.__name__) info("Reduced potential:", potential) constants and info("Provided constants: ", constants) freeEnergy = "f" if freeEnergy in properties.keys(): raise InputError("Word % is reserved for free energies" % freeEnergy) condframe = pd.DataFrame(data=conditions) if isinstance(conditions, dict) else conditions propfuncs = list(properties.values()) if not derivatives: propnames = [freeEnergy] + list(properties.keys()) combs = combinations.values() gProps = self.__compute__(propfuncs, constants) if combinations: gDelta = deltaMethod(combs, propnames, constants) results = list() for (index, condition) in cases(condframe): mics.verbose and condition and info("Condition[%s]" % index, condition) consts = dict(condition, **constants) u = self.__compute__(potential, consts) y = gProps if gProps else self.__compute__(propfuncs, consts) (yu, Theta) = self.engine.__reweight__(self, u, y, reference) result = propertyDict(propnames, yu, stdError(Theta)) if combinations: delta = gDelta if gDelta.valid else deltaMethod(combs, propnames, consts) (h, dh) = delta.evaluate(yu, Theta) result.update(propertyDict(combinations.keys(), h, dh)) results.append(result.to_frame(index)) return condframe.join(pd.concat(results)) else: symbols = list(condframe.columns) + list(constants.keys()) parameters = set(x for (y, x) in derivatives.values()) props = dict() for x in parameters: props[crypto(x)] = diff(potential, x, symbols) combs = dict() for (z, (y, x)) in derivatives.items(): if y == freeEnergy: combs[z] = crypto(x) else: dydx = diff(properties[y], x, symbols) props[crypto(z)] = "%s - (%s)*(%s)" % (dydx, props[crypto(x)], properties[y]) combs[z] = "%s + (%s)*(%s)" % (crypto(z), crypto(x), y) unwanted = sum([[x, errorTitle(x)] for x in props.keys()], []) return self.reweighting(potential, dict(properties, **props), {}, dict(combs, **combinations), condframe, reference, **constants).drop(unwanted, axis=1)
# ====================================================================================== def pmf(self, potential, property, bins=10, interval=None, **constants): if mics.verbose: info("\n=== Computing PMF with %s ===" % self.engine.__class__.__name__) info("Reduced potential:", potential) u = self.__compute__(potential, constants) z = self.__compute__(property, constants) if interval: (zmin, zmax) = interval else: zmin = min(np.amin(x[0, :]) for x in z) zmax = max(np.amax(x[0, :]) for x in z) delta = (zmax - zmin)/bins ibin = [np.floor((x[0:1, :] - zmin)/delta).astype(int) for x in z] results = list() for i in range(bins): zc = zmin + delta*(i + 0.5) mics.verbose and info("Bin[%d]:" % (i + 1), "%s = %s" % (property, str(zc))) y = [np.equal(x, i).astype(np.float) for x in ibin] (yu, Theta) = self.engine.__reweight__(self, u, y) if yu[1] > 0.0: dyu = np.sqrt(max(0.0, Theta[1, 1])) results.append([zc, -np.log(yu[1]), dyu/yu[1]]) return pd.DataFrame(results, columns=[property, "pmf", errorTitle("pmf")]) # ====================================================================================== def histograms(self, property="u0", bins=100, **constants): if property == "u0": y = self.u0 elif property == "state": w = np.arange(self.m) + 1 wsum = sum(w) y = [wsum*np.average(p, axis=0, weights=w) for p in self.P] elif property == "potential": y = [self.u[i][i, :] for i in range(self.m)] else: y = self.__compute__(property, constants) ymin = min([np.amin(x) for x in y]) ymax = max([np.amax(x) for x in y]) delta = (ymax - ymin)/bins center = [ymin + delta*(i + 0.5) for i in range(bins)] frame = pd.DataFrame({property: center}) for i in range(self.m): frame["state %s" % (i+1)] = np.histogram(y[i], bins, (ymin, ymax))[0] return frame