Commit 7c2ad477 authored by Maude Le Jeune's avatar Maude Le Jeune
Browse files

Resolve "Multiproc version of ptmcmc"

parent 234db86e
image: python:3.8
stages:
- deploy
pages:
stage: deploy
script:
- apt-get update && apt-get install --yes --no-install-recommends git python3-dev
- pip3 install --no-cache-dir numpy sphinx sphinx_rtd_theme m2r2
- git submodule init
- git submodule update
- python setup.py install
- cd doc
- sphinx-build -b html . ../public
artifacts:
paths:
- public
only:
- master
- 3-multiproc-version-of-ptmcmc
......@@ -2,9 +2,11 @@
### This is MCMC sampler
This is rather a simple sampler. It uses elements taken from other samplers. The key point of this one is
that it can/should be run in a form of several chains, some proposals use all chains to navigate through parameter space.
The main aim is not only sample the posterior but also make an efficient search/burn in
This is rather a simple sampler. It uses elements taken from other
samplers. The key point of this one is that it can/should be run in a
form of several chains, some proposals use all chains to navigate
through parameter space. The main aim is not only sample the
posterior but also make an efficient search/burn in
### Missing parts
......@@ -20,3 +22,8 @@ Note that I am trying to use submodules - please use:
to populate it
### Documentation
Documentation is available [here](https://stas.pages.in2p3.fr/samplermcmc)
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
API
===
Contents:
.. toctree::
:maxdepth: 2
sampler <sampler>
chain <chain>
proposal <proposal>
chain
=====
.. automodule:: MCMC_multichain.chain
:members:
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = 'samplermcmc'
copyright = '2021, S. Babak'
author = 'S. Babak'
# The full version, including alpha/beta/rc tags
release = '0.1'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.doctest',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.imgmath',
'sphinx.ext.viewcode',
'sphinx.ext.napoleon',
'm2r2'
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
source_suffix = ['.txt', '.md']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Using mcmcsampler: a demo
```python
import numpy as np
import matplotlib.pyplot as plt
import MCMC_multichain.proposal as proposal
from MCMC_multichain.sampler import Sampler
```
## Define a log-likelihood function
Here we take a simple multi dimensional Gaussian.
We use a ```class``` to easily store all the meta data needed to evaluate the log-likelihood
```python
class TestLogLik:
""" A multi dimensional Gaussian
"""
def __init__(self, ndim):
""" Init number of dimensions.
"""
self.ndim = ndim
self.param_dic = [f"p{i}" for i in range(ndim)]
means = np.random.rand(ndim)
cov = 0.5 - np.random.rand(ndim ** 2).reshape((ndim, ndim))
cov = np.triu(cov)
cov += cov.T - np.diag(cov.diagonal())
cov = np.dot(cov, cov)
self.mu = means
self.cov = cov
def loglik(self, x):
""" Return log-likelihood for a given point x.
"""
diff = x - self.mu
return -0.5 * np.dot(diff, np.linalg.solve(self.cov, diff))
def logpi(self, p):
""" Return log-prior for a given point x.
"""
return -7.0
```
## Define the sampler
```python
# Parallel tempering parameters
Tmax = 10
Nchains = 5
# Define likelihood, priors and starting point
ndim = 3
T = TestLogLik(ndim)
priors = np.array([-3,3]*T.ndim).reshape(T.ndim,2)
x0 = [np.random.randn(T.ndim) for n in range(Nchains)]
S = Sampler(Nchains, priors, T.loglik, T.logpi, T.param_dic, Tmax=Tmax)
S.set_starting_point(x0)
```
INFO:root:Tmax=10, range of temperature: [ 1. 1.77827941 3.16227766 5.62341325 10. ]
## Define the proposals
```python
# Define proposals
SL = proposal.Slice(T.param_dic).slice
SC = proposal.SCAM(T.param_dic).SCAM
p_dict = [{SL:40, SC:70}]*Nchains
S.set_proposals(p_dict)
```
## Run the sampler
```python
# Run mcmc
niter = 1000
c = S.run_mcmc(niter, pSwap=0.95, printN=2000, multiproc=False)
```
INFO:root:iter 0
INFO:root:chain 0
INFO:root:current loglik: -4.7, best: -0.1, temp: 1.0
INFO:root:chain 1
INFO:root:current loglik: -4.5, best: -0.0, temp: 1.8
INFO:root:chain 2
INFO:root:current loglik: -4.7, best: -0.0, temp: 3.2
INFO:root:chain 3
INFO:root:current loglik: -4.5, best: -0.0, temp: 5.6
INFO:root:chain 4
INFO:root:current loglik: -4.7, best: -0.0, temp: 10.0
## Show posterior for 1st parameter
```python
# Plot distribution of first parameter
C0 = np.array(S.chains[0].data["chn"])
plt.figure()
plt.hist(C0[:,0], bins=100, color="k", histtype="step")
plt.axvline(x=T.mu[0], label='True')
plt.legend()
```
<matplotlib.legend.Legend at 0x7f25af3e68b0>
![png](demo_11_1.png)
.. mdinclude:: demo.md
Welcome to samplermcmc's documentation!
=======================================
Contents:
.. toctree::
:maxdepth: 2
readme <readme>
demo <demo>
API <api>
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
proposal
========
.. automodule:: MCMC_multichain.proposal
:members:
.. mdinclude:: ../README.md
sampler
=======
.. automodule:: MCMC_multichain.sampler
:members:
:undoc-members:
......@@ -22,7 +22,7 @@ setup(name='MCMC_multichain',
license='GNU',
packages=['MCMC_multichain'],
package_dir={'MCMC_multichain': 'src'},
install_requires=['numpy', 'scipy', 'acor'],
install_requires=['numpy', 'scipy', 'acor', 'sklearn'],
include_package_data=True,
zip_safe=False
)
"""
samplermcmc.chain
"""
import copy
import acor._acor as acor
import numpy as np
import multiprocessing.managers
from sklearn.mixture import BayesianGaussianMixture as BGM
import sys
import multiprocessing
ACOR_MAXLAG = 10
class ChainData:
"""A container for chain shared data.
"""
def __init__(self, beta=1, profiling=False):
""" Define data containers as attributes
"""
self.Mx = 0
self.Lx = 0
self.Px = 0
def to_dict(self):
""" Return a dict to be shared accros process.
"""
d = dict()
for k,v in vars(self).items():
d[k] = v
return d
class Chain:
""" A tempered chain and its associated proposals.
"""
def __init__(self, beta, dbeta, log_l, log_p, logger=None, profiling=False):
""" Initialize a chain data container.
"""
self.data = ChainData(beta, profiling).to_dict()
self.maxL = -np.inf
self.maxP = -np.inf
self.chn = []
self.logL = []
self.logP = []
self.modes = []
self.cov = 0
self.U = 0
self.S = 0
self.Px = 0
self.Lx = 0
self.beta = beta
self.dbeta1 = dbeta # beta - beta1 (of the n+1 chain)
self.swap_accepted = 1
self.swap_proposed = 1
self.props = {}
self.anneal = 1
self.log_l = log_l
self.log_p = log_p
self.logger = logger
self.profiling = profiling
if profiling:
self.jumps = {}
self.ar = {}
self.shared = False
def init_cov(self, cov=None):
"""Initialize the covariance matrix and its SVD decomposition.
"""
if cov is None:
cov = np.diag(np.ones(len(self.Mx))*0.01**2)
self.cov = cov
self.U, self.S, v = np.linalg.svd(cov)
def share_data(self, manager):
"""Convert data container into a shared container, using
multiprocessing.manager.
"""
d = manager.dict()
for k,v in self.data.items():
d[k] = v
self.data = d
self.shared = True
def add_current(self):
""" Add current point Mx to the list of accumulated points.
"""
if 0:#self.shared:
self.Px = self.log_p(self.Mx)
self.Lx = self.log_l(self.Mx)
self.chn.append(self.Mx)
self.logL.append(self.Lx)
self.logP.append(self.Px)
def add(self, p, lik, prior, is_max=False):
""" Add a new point and update Mx, logL, logP, Lx, Px
if is_max is True, also update maxL and maxP.
"""
self.chn.append(p)
self.Mx = p
self.logL.append(lik)
self.Lx = lik
self.logP.append(lik+prior)
self.Px = lik+prior
if is_max:
self.maxP = self.Px
self.maxL = self.Lx
# @property
# def beta(self):
# """ Inverse temperature.
# """
# return self.data['beta']
# @beta.setter
# def beta(self, value):
# self.data['beta'] = value
@property
def Mx(self):
""" Current model point.
"""
return self.data['Mx']
@Mx.setter
def Mx(self, value):
self.data['Mx'] = value
@property
def Lx(self):
""" Current model point.
"""
return self.data['Lx']
@Lx.setter
def Lx(self, value):
self.data['Lx'] = value
@property
def Px(self):
""" Current model point.
"""
return self.data['Px']
@Px.setter
def Px(self, value):
self.data['Px'] = value
# @property
# def swap(self):
# """ Log-likelihood for current model point Mx.
# """
# return self.data['swap']
def set_proposals(self, props):
""" Set list of proposal with their associated weights.
"""
wtot = np.array(list(props.values())).sum()
for k,v in props.items():
props[k] = float(v)/wtot
self.props = props
if self.profiling:
for k,v in self.props.items():
kk = k.__self__.name
self.jumps[kk] = 0
self.ar[kk] = 0
def choose_proposal(self):
""" Randomly choose a proposal, along given weights.
"""
kys = list(self.props.keys())
i_p = np.random.choice(len(kys), p=np.array(list(self.props.values())))
p = list(self.props.keys())[i_p]
if self.profiling:
self.jumps[p.__self__.name] += 1
return p
def get_size(self):
""" Return size of the chain.
"""
return len(self.chn)
def get_sample(self, i):
""" Return sample for a given indice.
"""
return self.chn[i]
def get_ratio(self):
""" Return acceptance ratio
"""
return self.swap_accepted/self.swap_proposed
def swapped(self):
""" Gather chain swap statistics.
"""
self.swap_accepted += 1
def full_step(self, ims, chains=None):
""" Update current point using a proposal.
"""
# new point
prop = self.choose_proposal()
kprop = prop.__self__.name
y, status, Ly = prop(self.Mx, chain=self, chains=chains)
if Ly is None:
Ly = self.log_l(y)
# accept or reject
if status==1:# accept in any case (slice like)
self.add(y, Ly, self.log_p(y), is_max=(Ly > self.maxL))
self.ar[kprop] += 1
elif status==0:
accepted = self.MH_step(y, Ly, 0) #accept or not
if self.profiling and accepted:
self.ar[kprop] += 1
elif status==-1 and self.profiling:# not really considered (like below threshold)
self.jumps[kprop] -= 1
return self.Mx, self.Lx, self.Px
def MH_step(self, y, Ly, qxy):
""" Performs Metropolis-Hastings selection
"""
if not np.isfinite(Ly):
self.add_current()
return
x = self.Mx
pi_y = self.log_p(y)
log_MH = (Ly - self.Lx)*self.beta + qxy + pi_y - self.log_p(x)
log_alp = np.log(np.random.random())
if log_MH > log_alp:
is_max = Ly > self.maxL
self.add(y, Ly, pi_y, is_max=is_max)
return True
else:
self.add_current()
return False
def update_cov(self):
""" Update covariance matrix and its SVD decomposition.
"""
xs = np.copy(self.chn)
SzChn = int(0.75 * np.shape(xs)[0]) ### skip first 25% of the chain
xs = xs[SzChn:, :]
al, mean, std = acor.acor(xs[:, 0], ACOR_MAXLAG)
for jj in range(1, np.shape(xs)[1]):
xtmp = xs[:, jj]
ac = 10000
try:
ac = acor.acor(xs[:, jj], ACOR_MAXLAG)[0]
except:
self.logger.warning('failed acor {jj}, {xs[:, jj]}')
if ac < al:
al = ac
if al > 100:
al = np.random.uniform(10., 100.)
# if autocorrelation >100 use random between 10 100 otherwise we won't have enough points
if al < 1.0:
al = 1
x_tot = xs[::int(al)]
cov = np.cov(x_tot.T)
self.cov = cov
self.U, self.S, v = np.linalg.svd(cov)