Commit 1acee541 authored by Matthieu Tristram's avatar Matthieu Tristram
Browse files

Change chain format to dict

Update all routines to match new format
Add convergence routines
parent dea1e9ba
......@@ -15,63 +15,26 @@ def cov2cor( mat):
return(cor)
def readCosmoMC( filename, nchain=4):
cosmopars = list(loadtxt( "%s.paramnames" % (filename), dtype='str', unpack=True, usecols=[0]))
def extract_bestfit( dirname):
parlist = ['iter','chi2']+[from_cosmomcName.setdefault(p,p) for p in cosmopars]
f = open( dirname, 'r')
param = array( f.readline().replace('\n','').split('\t'), dtype=str)
value = array( f.readline().replace('\t',' ').split(' '), dtype=double)
f.close()
return( param,value)
# bestfit = dict(zip(param, value))
# return( bestfit)
data = extract_chains( filename+"_", nchain)
chain = {}
for p in range(len(parlist)):
chain[parlist[p]] = concatenate( [tmp[p,:] for tmp in data])
def extract_hess( filename, ext="MnUserCovariance", reverse=False):
f = open( filename, 'r')
lines=f.readlines()
f.close()
return( chain)
npar = 0
par = []
for l in lines:
if l.startswith('par\t'):
npar = npar + 1
par.append( l.split("\t")[1])
print( "number of parameter : %d/%d" % (npar,len(par)))
def mergeMC( filename, burnin=0.9, num=(1,2,3,4), ext="txt", par=None):
if reverse:
lines.reverse()
index = 0
while not lines[index].startswith(ext):
index = index+1
print(index)
while lines[index] == "\n" or lines[index] == "":
index = index+1
if par == None:
f = open( "%s%d.%s" % (filename,num[0],ext))
par = [v for v in f.readline().replace("\n","").split("\t") if v != ""]
cov=[]
for n in range(npar):
cov.append([v for v in lines[index+2+n].replace("\n","").split(" ") if v != ""])
n = len(cov[0])
mat = zeros( (n,n))
for i in range(n):
mat[i,:] = array(cov[i])
if reverse:
mat = flipud(mat)
return(mat,par)
def mergeMC( filename, burnin=0.9, num=(1,2,3,4), ext="txt"):
for n in range(0,len(num)):
tmp = loadtxt( "%s%d.%s" % (filename,num[n],ext), skiprows=1)
......@@ -85,10 +48,11 @@ def mergeMC( filename, burnin=0.9, num=(1,2,3,4), ext="txt"):
else:
chain = concatenate((chain,tmp[ist:,:]),axis=0)
f = open( "%s%d.%s" % (filename,num[0],ext))
par = [v for v in f.readline().replace("\n","").split("\t") if v != ""]
return(chain,par)
dchain = {}
for i in range(len(par)):
dchain[par[i]] = chain[:,i]
return(dchain)
def scan(filename):
......@@ -140,6 +104,7 @@ def extract_chains( chainame, nchain, begin=0, end=-1):
imin = begin
imax = end
chain = []
for c in range(0,nchain):
print( "%s%d.txt" % (chainame,c+1))
tmp = loadtxt( "%s%d.txt" % (chainame,c+1), skiprows=1)
......@@ -149,129 +114,89 @@ def extract_chains( chainame, nchain, begin=0, end=-1):
if imax < 0:
imax = nsamples
nele = (imax-imin)
if c == 0:
chain = zeros( (nchain, npar, nele))
chain[c,:,:] = tmp[imin:imax,:].T
f = open( "%s%d.txt" % (chainame,1))
par = [v for v in f.readline().replace("\n","").split("\t") if v != ""]
chain.append( tmp[imin:imax,:].T)
return(chain,par)
return(chain)
#GR test
def GR( chainame, gap=10000, length_max=-1):
nchain=4
length_min = 50
chain = extract_chains( chainame, nchain)
bla,npar,nsamples = shape(chain)
if length_max == -1:
length_max = nsamples
nele = (length_max-length_min)/gap+1
index_event = 0
R = zeros( (nele, npar))
for index in range(length_min,length_max,gap):
burnin = index/2
mchain = mean( chain[:,:,burnin:index],2)
schain = var( chain[:,:,burnin:index],2)
B_n = var( mchain,0)
W = mean( schain,0)
R[index_event,:] = ( (index-1.)/index*W + B_n*(1.+1./nchain) )/W
index_event = index_event + 1
return(R)
def hist2d(x, y, *args, **kwargs):
"""
Plot a 2-D histogram of samples.
"""
import scipy.stats
import numpy as np
import matplotlib.pyplot as pl
import matplotlib.cm as cm
ax = kwargs.pop("ax", pl.gca())
########################################################################
#CONVERGENCE
########################################################################
extent = kwargs.pop("extent", [[x.min(), x.max()], [y.min(), y.max()]])
bins = kwargs.pop("bins", 50)
color = kwargs.pop("color", "k")
alpha = kwargs.pop("alpha", 0.8)
linewidths = kwargs.pop("linewidths", None)
fill = kwargs.get("fill", True)
datapoints = kwargs.get("datapoints", False)
contours = kwargs.get("contours", True)
cmap = cm.get_cmap(kwargs.get("cmap", "gray"))
sigmas = kwargs.get("sigmas", [1,2,3])
#GR test
def GelmanRubin( chains, gap=10000, length_max=None):
length_min = 1000
X = np.linspace(extent[0][0], extent[0][1], bins + 1)
Y = np.linspace(extent[1][0], extent[1][1], bins + 1)
try:
H, X, Y = np.histogram2d(x.flatten(), y.flatten(), bins=(X, Y),
weights=kwargs.get('weights', None))
except ValueError:
raise ValueError("It looks like at least one of your sample columns "
"have no dynamic range. You could try using the "
"`extent` argument.")
gkde = scipy.stats.gaussian_kde([x, y])
dx = (extent[0][1]-extent[0][0])/(bins+1.)
dy = (extent[1][1]-extent[1][0])/(bins+1.)
# chains = extract_chains( chainame, nchain)
nchain = len(chains)
x2,y2 = np.mgrid[extent[0][0]:extent[0][1]:dx, extent[1][0]:extent[1][1]:dy]
z = np.array(gkde.evaluate([x2.flatten(),y2.flatten()])).reshape(x2.shape)
nsamples = min( [len(c[0,:]) for c in chains])
if length_max==None:
length_max = nsamples
W = (np.exp(-0.5 * np.array(sigmas) ** 2))*z.max()
W = np.append(W[::-1],z.max())
if datapoints:
ax.plot(x, y, "o", color=color, ms=1.5, zorder=-1, alpha=0.1, rasterized=True)
if length_max > nsamples:
print( "Not enough samples...")
exit()
if fill:
ax.contourf(x2, y2, z, W, alpha=alpha, cmap=cmap, antialiased=True)
if contours:
ax.contour(x2, y2, z, W[:np.size(sigmas)-1],linewidths=linewidths, colors=color, antialiased=False)
data = np.vstack([x, y])
mu = np.mean(data, axis=1)
cov = np.cov(data)
if kwargs.pop("plot_ellipse", False):
error_ellipse(mu, cov, ax=ax, edgecolor="r", ls="dashed")
ax.set_xlim(extent[0])
ax.set_ylim(extent[1])
it = range(length_min,length_max,gap)
R = []
for index in it:
# n = index - length_min
# tmp = [c[:,length_min+n/2:index] for c in chains]
n = length_max - index
tmp = [c[:,index+n/2:length_max] for c in chains]
#within Chain
mchain = mean( tmp,2)
schain = var( tmp,2)
#between chains
W = mean( schain,0)
B = var( mchain,0)
#sqrt ?
R.append( ( (index-1.)/index*W + B*(1.+1./nchain) )/W )
return(it,R)
def ctr_level(histogram2d, lvl, infinite=False):
"""
Extract the contours for the 2d plots (Karim Benabed)
"""
import numpy as np
def Geweke( chains, gap=10000, length_max=None):
length_min = 1000
nchain = len(chains)
nsamples = min( [len(c[0,:]) for c in chains])
if length_max==None:
length_max = nsamples
hist = histogram2d.flatten()*1.
hist.sort()
cum_hist = np.cumsum(hist[::-1])
cum_hist /= cum_hist[-1]
if length_max > nsamples:
print( "Not enough samples...")
exit()
it = range(length_min,length_max,gap)
T = []
for index in it:
n = length_max-index
tmp1 = [c[:,index:index+n*0.1] for c in chains]
tmp2 = [c[:,index+n*0.5:length_max] for c in chains]
#Tscore
Z1 = mean( tmp1,2)
Z2 = mean( tmp2,2)
s = sqrt( var(tmp1,2)/(n*0.1) + var(tmp2,2)/(n*0.5) )
T.append( (Z1-Z2)/s)
return(it,T)
########################################################################
alvl = np.searchsorted(cum_hist, lvl)
clist = hist[-alvl]
return clist
########################################################################
#PARAMETER NAMES
########################################################################
parname = {
'omega_b' : '$\\Omega_\mathrm{b}$',
......@@ -343,7 +268,7 @@ parname = {
}
cosmomcName = {
to_cosmomcName = {
'omega_b' : 'omegabh2',
'omega_cdm' : 'omegach2',
'100*theta_s' : 'theta',
......@@ -380,80 +305,93 @@ cosmomcName = {
'Aps143x217' : 'Aps143x217',
}
from_cosmomcName = dict(zip(to_cosmomcName.values(),to_cosmomcName.keys()))
########################################################################
def triangle( chains, nbin=50, params='', name='', npar=6, color=''):
#PLOTS
########################################################################
def triangle( chains, params, nbin=50, parnames=None, name='', npar=6, color=''):
import matplotlib.pyplot as plt
import scipy.ndimage as nd
import matplotlib.ticker as mtick
fig=plt.figure(figsize=(14,12))
plt.subplots_adjust(hspace=0.001,wspace=0.001)
tmp = concatenate( chains)
nchain = len(chains)
if len(color) < nchain:
ax = plt.gca()
color_cycle = ax._get_lines.color_cycle
color = [next(color_cycle) for i in range(nchain)]
if parnames == None:
parnames = [parname.setdefault(p,p) for p in params]
npar = len(params)
npar = min( [npar,len(params)-1])
rge = {}
for par in params:
tmp = []
for chain in chains:
tmp = tmp+list(chain[par])
rge[par] = list(mean(tmp) + dot(std(tmp), [-4,4]))
for xpar in range(1,npar+1):
ax=plt.subplot( npar, npar, (xpar-1)*npar+xpar)
xrge = list(mean(tmp[:,xpar]) + dot(std(tmp[:,xpar]), [-4,4]))
for par in params:
xpar = params.index(par)
ax=plt.subplot( npar, npar, xpar*npar+xpar+1)
for c in range(nchain):
chain = chains[c]
h,X=histogram( chain[:,xpar], bins=nbin, range=xrge, normed=True)
h,X=histogram( chain[par], bins=nbin, range=rge[par], normed=True)
plt.plot( (X[1:]+X[:-1])/2., nd.gaussian_filter( h, 1), color=color[c])
ax.set_xlim( xrge)
ax.set_xlim( rge[par])
ax.yaxis.set_visible(False)
ax.xaxis.set_visible(xpar==npar)
ax.xaxis.set_visible(xpar==(npar-1))
#ax.locator_params( nbins=npar-1)
ax.tick_params(axis='both', which='major', labelsize=6)
ax.tick_params(axis='both', which='minor', labelsize=5)
if 'PS' in parname.setdefault(params[xpar],params[xpar]):
if 'PS' in parname.setdefault(parnames[xpar],parnames[xpar]):
ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e'))
#ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e'))
ax.locator_params(nbins=4)
ax.locator_params(tight=True, nbins=4)
for ypar in range(xpar-1,0,-1):
ax=plt.subplot( npar, npar, (xpar-1)*npar+ypar)
for ypar in range(xpar)[::-1]:
par2 = params[ypar]
ax=plt.subplot( npar, npar, xpar*npar+ypar+1)
yrge = list(mean(tmp[:,ypar]) + dot(std(tmp[:,ypar]), [-4,4]))
for c in range(nchain):
chain = chains[c]
hmap,binX,binY = histogram2d( chain[:,xpar], chain[:,ypar], bins=nbin, range=[xrge,yrge])
hmap,binX,binY = histogram2d( chain[par], chain[par2], bins=nbin, range=[rge[par],rge[par2]])
Y,X = meshgrid((binX[1:]+binX[:-1])/2.,(binY[1:]+binY[:-1])/2.)
plt.contour( X,Y, nd.gaussian_filter(hmap.T,2), levels=ctr_level( hmap, [0.68,0.95]), colors=color[c], extent=tuple(xrge+yrge))
plt.contour( X,Y, nd.gaussian_filter(hmap.T,2), levels=ctr_level( hmap, [0.68,0.95]), colors=color[c], extent=tuple(rge[par]+rge[par2]))
ax.locator_params( nbins=npar-1)
ax.ticklabel_format(useOffset=False)
ax.tick_params(axis='both', which='major', labelsize=6)
ax.tick_params(axis='both', which='minor', labelsize=5)
ax.yaxis.set_visible(ypar==1)
ax.xaxis.set_visible(xpar==npar)
ax.locator_params(nbins=4)
if 'PS' in parname.setdefault(params[ypar],params[ypar]):
ax.yaxis.set_visible(ypar==0)
ax.xaxis.set_visible(xpar==(npar-1))
ax.locator_params(tight=True, nbins=4)
if 'PS' in parname.setdefault(parnames[ypar],parnames[ypar]):
ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e'))
#plot parameter names
if params != '' and len(params)>npar:
for xpar in range(1,npar+1):
plt.subplot( npar, npar, (xpar-1)*npar+1)
plt.ylabel(parname.setdefault(params[xpar],params[xpar]))
plt.subplot( npar, npar, (npar-1)*npar+xpar)
plt.xlabel(parname.setdefault(params[xpar],params[xpar]))
plt.tick_params(axis='both', which='major', labelsize=6)
plt.tick_params(axis='both', which='minor', labelsize=5)
plt.locator_params(bins=4)
#print parname.setdefault(params[xpar])
for xpar in range(npar):
plt.subplot( npar, npar, xpar*npar+1)
plt.ylabel(parname.setdefault(parnames[xpar],parnames[xpar]))
plt.subplot( npar, npar, (npar-1)*npar+xpar+1)
plt.xlabel(parname.setdefault(parnames[xpar],parnames[xpar]))
plt.tick_params(axis='both', which='major', labelsize=6)
plt.tick_params(axis='both', which='minor', labelsize=5)
plt.locator_params(tight=True, nbins=4)
#print parname.setdefault(params[xpar])
#write legend
if len(name) == nchain:
ax=plt.subplot( npar, npar, int(0.25*npar)*npar+npar-1)
......@@ -469,21 +407,168 @@ def triangle( chains, nbin=50, params='', name='', npar=6, color=''):
def hist2d(x, y, *args, **kwargs):
"""
Plot a 2-D histogram of samples.
"""
import scipy.stats
import numpy as np
import matplotlib.pyplot as pl
import matplotlib.cm as cm
ax = kwargs.pop("ax", pl.gca())
extent = kwargs.pop("extent", [[x.min(), x.max()], [y.min(), y.max()]])
bins = kwargs.pop("bins", 50)
color = kwargs.pop("color", "k")
alpha = kwargs.pop("alpha", 0.8)
linewidths = kwargs.pop("linewidths", None)
fill = kwargs.get("fill", True)
datapoints = kwargs.get("datapoints", False)
contours = kwargs.get("contours", True)
cmap = cm.get_cmap(kwargs.get("cmap", "gray"))
sigmas = kwargs.get("sigmas", [1,2,3])
X = np.linspace(extent[0][0], extent[0][1], bins + 1)
Y = np.linspace(extent[1][0], extent[1][1], bins + 1)
try:
H, X, Y = np.histogram2d(x.flatten(), y.flatten(), bins=(X, Y),
weights=kwargs.get('weights', None))
except ValueError:
raise ValueError("It looks like at least one of your sample columns "
"have no dynamic range. You could try using the "
"`extent` argument.")
gkde = scipy.stats.gaussian_kde([x, y])
dx = (extent[0][1]-extent[0][0])/(bins+1.)
dy = (extent[1][1]-extent[1][0])/(bins+1.)
x2,y2 = np.mgrid[extent[0][0]:extent[0][1]:dx, extent[1][0]:extent[1][1]:dy]
z = np.array(gkde.evaluate([x2.flatten(),y2.flatten()])).reshape(x2.shape)
W = (np.exp(-0.5 * np.array(sigmas) ** 2))*z.max()
W = np.append(W[::-1],z.max())
if datapoints:
ax.plot(x, y, "o", color=color, ms=1.5, zorder=-1, alpha=0.1, rasterized=True)
if fill:
ax.contourf(x2, y2, z, W, alpha=alpha, cmap=cmap, antialiased=True)
if contours:
ax.contour(x2, y2, z, W[:np.size(sigmas)-1],linewidths=linewidths, colors=color, antialiased=False)
data = np.vstack([x, y])
mu = np.mean(data, axis=1)
cov = np.cov(data)
if kwargs.pop("plot_ellipse", False):
error_ellipse(mu, cov, ax=ax, edgecolor="r", ls="dashed")
ax.set_xlim(extent[0])
ax.set_ylim(extent[1])
def plot_covariance( chain, par, **kwargs):
import matplotlib.pyplot as plt
vmin = kwargs.pop("vmin", -1)
vmax = kwargs.pop("vmax", 1)
data = [chain[p] for p in par]
plt.figure( figsize=(14,10))
#covmat = triu(corrcoef( chain[:,1:].T), k=1)
covmat = tril(corrcoef( chain[:,1:].T), k=-1)
#covmat = triu(corrcoef( chain.values()[1:]), k=1)
covmat = tril(corrcoef( data), k=-1)
covmat[covmat==0] = NaN
plt.imshow( covmat, vmin=vmin, vmax=vmax, **kwargs)
plt.yticks( range(len(covmat)), [parname.setdefault(p,p) for p in par[1:]])
plt.yticks( range(len(covmat)), [parname.setdefault(p,p) for p in par])
plt.xticks( range(len(covmat)), '')
plt.colorbar()
for i in range( len(par[1:-1])):
for i in range( len(par[:-1])):
# text( i, i+0.25, parname.setdefault(par[i+1],par[i+1]),rotation=-90)
plt.text( i-0.25, i+0.25, parname.setdefault(par[i+1],par[i+1]),rotation='vertical',verticalalignment='bottom')
plt.text( i-0.25, i+0.25, parname.setdefault(par[i],par[i]),rotation='vertical',verticalalignment='bottom')
def ctr_level(histogram2d, lvl, infinite=False):
"""
Extract the contours for the 2d plots (Karim Benabed)
"""
import numpy as np
hist = histogram2d.flatten()*1.
hist.sort()
cum_hist = np.cumsum(hist[::-1])
cum_hist /= cum_hist[-1]
alvl = np.searchsorted(cum_hist, lvl)
clist = hist[-alvl]
return clist
########################################################################
########################################################################
def extract_bestfit( dirname):
f = open( dirname, 'r')
param = array( f.readline().replace('\n','').split('\t'), dtype=str)
value = array( f.readline().replace('\t',' ').split(' '), dtype=double)
f.close()
return( param,value)
# bestfit = dict(zip(param, value))
# return( bestfit)
def extract_hess( filename, ext="MnUserCovariance", reverse=False):
f = open( filename, 'r')
lines=f.readlines()
f.close()
npar = 0
par = []
for l in lines:
if l.startswith('par\t'):
npar = npar + 1
par.append( l.split("\t")[1])
print( "number of parameter : %d/%d" % (npar,len(par)))
if reverse:
lines.reverse()
index = 0
while not lines[index].startswith(ext):
index = index+1
print(index)
while lines[index] == "\n" or lines[index] == "":
index = index+1
cov=[]
for n in range(npar):