Commit 3ccdd452 authored by Plaszczynski Stephane's avatar Plaszczynski Stephane
Browse files

Merge branch 'master' of gitlab.in2p3.fr:plaszczy/CAMEL

parents 91e011aa c0478ca8
#!/bin/bash
#EXPECTED_ARGS=5
#parFile=$1
#var=$2
#xmin=$3
#xmax=$4
#N=$5
#if [ $# -ne $EXPECTED_ARGS ]
#then
# echo "Usage: `basename $0` parfile var xmin xmax N"
# exit 1
#fi
#val array
var="100*theta_s"
xmin=1.04
xmax=1.0422
N=50
EXPECTED_ARGS=2
if [ $# -ne $EXPECTED_ARGS ]
then
echo ">>>> Usage: `basename $0` parfile range"
echo "range should be specified as an interval in the form: I-J (as \"1-3\")"
echo "which corresponds to each array-job interval (ie. starting points)"
echo "parfile values are not shuffled (randomly) for job 1"
exit 1
fi
parFile=$1
range=$2
#check environment
if [ -z "${CAMELROOT}" ] ; then
echo "CAMELROOT undefined: did you sourced camel_setup.sh?"
exit
fi
if [ ! -f "$parFile" ] ; then
echo "Missing parfile=$parFile"
exit
fi
##############define the scanned variable###############
# define the variable/values that is scanned
var="log(10^10A_s)"
#values : methods 1 specify number of points N and boudaries [xmin,xmax]
N=50
xmin=2.9
xmax=3.2
#compute
for ((i=0;i<N;i++)) ; do
val[$i]=$(echo | awk -v xmin=$xmin -v xmax=$xmax -v N=$N -v i=$i 'BEGIN{step=(xmax-xmin)/(N-1)} {printf("%f",xmin+(i)*step)}')
done
#val=(0.9 1 1.05 1.10 1.15 1.20 1.25 1.30 1.35 1.4)
#val=(0.95 1.03 1.07 1.12 1.17)
#val=(0.95 1 1.03 1.05 1.07 1.10 1.12 1.15 1.20 1.25 1.30)
# method 2. add explcitely the list of values
#var="Alens"
#val=(0.95 1 1.03 1.05 1.07 1.10 1.12 1.15 1.20 1.25 1.30)
#or
#var=tau_reio
#val=(0.05 0.06 0.07 0.08 0.09 0.10 0.11 0.12 0.13 0.14 0.15 0.15 0.16 0.17 0.18 0.19 0.20 0.21 0.22 0.23 0.24 0.25)
#val=(0.025 0.035 0.045 0.055 0.065 0.075 0.085 0.095)
#val=(0.01 0.015 0.02 0.025 0.03 0.035 0.04 0.045 0.05 0.055 0.06 0.065 0.07 0.075 0.08 0.085 0.09 0.095 0.10 0.11 0.12)
#val=(0.16 0.17 0.18 0.10 0.20 0.21 0.22 0.23 0.24 0.25)
######################################################
echo "Scannning variable $var"
echo "values=${val[*]}"
#var=N_eff
#TE
#val=(1.5 1.6 1.7 1.8 1.9 2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. 3.2 3.5)
#val=(0.7 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. 3.2 3.5 )
#fix number of cores for openMP
NCORE=8
#default QSUB command (if not overriden by user)
if [ -z "${QSUB_CMD}" ] ; then
QSUB_CMD="qsub -P P_$GROUP -pe multicores $NCORE -q mc_long -R y -j y -l sps=1"
fi
echo "QSUB command will be: ${QSUB_CMD} -t $range"
#EE
#val=(0.1 0.5 1. 1.5 2 2.5 3 3.5 4 4.5 5)
#;val=(0.2 0.3 0.4 0.6 0.7 0.8 0.9 1.1 1.2 1.3 1.4 1.6 1.7 1.8 1.9 2.1 2.2 2.3 2.4 2.6 2.7 2.8 2.9 3.1 3.2 3.3 3.4 3.6 3.7)
echo "Is this OK? [y/n]"
read answer
if [ $answer != 'y' ] ; then
echo "exiting"
exit 1
fi
#TT
#val=(0.5 0.7 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. 3.2 3.4 )
#val=(3.5 3.6 3.7 3.8 3.9 4)
######################################
cd `dirname $1`
dirpar=$PWD
cd -
parbase=`basename $1`
file=$dirpar/$parbase
#escape problematic characters
varesc=$(printf "%q" "$var")
dirout=${parbase%".par"}_mprof_"${var}"
file=$(readlink -e $parFile)
parbase=$(basename $file)
dirout=${parbase%".par"}_mprof_"$var"
if [ -d "$dirout" ] ; then
echo "directory $dirout exists: still want to run? (o/n)"
echo "directory $dirout exists: still want to run? (y/n)"
read answer
if [ $answer != 'o' ] ; then
if [ $answer != 'y' ] ; then
echo "exiting"
exit 1
fi
......@@ -68,37 +81,19 @@ fi
mkdir $dirout
if [ -z "${CAMELROOT}" ] ; then
echo "CAMELROOT undefined"
exit
fi
if [ -z "$CLIKDIR" ] ; then
echo "miss CLIKDIR"
exit
fi
if [ ! -f "$file" ] ; then
echo "mssing $file"
exit
fi
CAMELDIR=${GROUP_DIR}/camel
HERE=$PWD
##########LOOP################################
for i in $(seq 0 $((${#val[@]}-1))) ; do
zeval=${val[$i]}
echo $zeval
OUTDIR=$HERE/$dirout/"$var"_${zeval}
mkdir $OUTDIR
cd $OUTDIR
###################################################
########################################################
cat > camelrun_$zeval <<EOBATCH
#!/bin/bash
#$ -N $(basename $(dirname $PWD))_$zeval
#!/bin/env bash
#$ -N ${parbase}_$zeval
#$ -R y
#$ -j y
#$ -o $PWD
......@@ -109,19 +104,19 @@ cd \$TMPDIR
uname -a
echo "NSLOTS=\$NSLOTS"
export OMP_NUM_THREADS=8
export OMP_NUM_THREADS=$NCORE
export PYTHONPATH=${PICO_CODE}
# copies localeS
cp $CAMELROOT/$CMTCONFIG/Minimize .
cp $CAMELROOT/batch/awk/genrand.awk .
cp $CAMELROOT/work/tools/awk/genrand.awk .
#
#input file -> transforme en pico et enleve var
grep -v "$var" $file > parfile_in
grep -v "$varesc" $file > parfile_in
#compute var
#compute var: here no escape
echo "fix $var cosmo $zeval" >> parfile_in
echo "init par:"
......@@ -150,10 +145,9 @@ cp -f best_fit "$OUTDIR"/best_fit\${SGE_TASK_ID}
cp -f covmat "$OUTDIR"/covmat\${SGE_TASK_ID}
EOBATCH
#####################################################################################################################################
#SP only
qsub -P P_planck_prod -t 1-3 -pe openmpi_8 8 -q pa_long camelrun_$zeval
qsub -P P_planck_prod -t 4-6 -pe multicores 8 -q mc_long camelrun_$zeval
#qsub -P P_planck_prod -t 1-10 -pe multicores 8 -q mc_long camelrun_$zeval
#####################################################################################################################################
###################################################################
#submit an array job for this scanned value
jobsub="${QSUB_CMD} -t \"$range\" camelrun_$zeval"
eval $jobsub
##################################################################
done
......@@ -15,63 +15,26 @@ def cov2cor( mat):
return(cor)
def readCosmoMC( filename, nchain=4):
cosmopars = list(loadtxt( "%s.paramnames" % (filename), dtype='str', unpack=True, usecols=[0]))
def extract_bestfit( dirname):
parlist = ['iter','chi2']+[from_cosmomcName.setdefault(p,p) for p in cosmopars]
f = open( dirname, 'r')
param = array( f.readline().replace('\n','').split('\t'), dtype=str)
value = array( f.readline().replace('\t',' ').split(' '), dtype=double)
f.close()
return( param,value)
# bestfit = dict(zip(param, value))
# return( bestfit)
data = extract_chains( filename+"_", nchain)
chain = {}
for p in range(len(parlist)):
chain[parlist[p]] = concatenate( [tmp[p,:] for tmp in data])
def extract_hess( filename, ext="MnUserCovariance", reverse=False):
f = open( filename, 'r')
lines=f.readlines()
f.close()
return( chain)
npar = 0
par = []
for l in lines:
if l.startswith('par\t'):
npar = npar + 1
par.append( l.split("\t")[1])
print( "number of parameter : %d/%d" % (npar,len(par)))
def mergeMC( filename, burnin=0.9, num=(1,2,3,4), ext="txt", par=None):
if reverse:
lines.reverse()
index = 0
while not lines[index].startswith(ext):
index = index+1
print(index)
while lines[index] == "\n" or lines[index] == "":
index = index+1
if par == None:
f = open( "%s%d.%s" % (filename,num[0],ext))
par = [v for v in f.readline().replace("\n","").split("\t") if v != ""]
cov=[]
for n in range(npar):
cov.append([v for v in lines[index+2+n].replace("\n","").split(" ") if v != ""])
n = len(cov[0])
mat = zeros( (n,n))
for i in range(n):
mat[i,:] = array(cov[i])
if reverse:
mat = flipud(mat)
return(mat,par)
def mergeMC( filename, burnin=0.9, num=(1,2,3,4), ext="txt"):
for n in range(0,len(num)):
tmp = loadtxt( "%s%d.%s" % (filename,num[n],ext), skiprows=1)
......@@ -85,10 +48,11 @@ def mergeMC( filename, burnin=0.9, num=(1,2,3,4), ext="txt"):
else:
chain = concatenate((chain,tmp[ist:,:]),axis=0)
f = open( "%s%d.%s" % (filename,num[0],ext))
par = [v for v in f.readline().replace("\n","").split("\t") if v != ""]
return(chain,par)
dchain = {}
for i in range(len(par)):
dchain[par[i]] = chain[:,i]
return(dchain)
def scan(filename):
......@@ -140,6 +104,7 @@ def extract_chains( chainame, nchain, begin=0, end=-1):
imin = begin
imax = end
chain = []
for c in range(0,nchain):
print( "%s%d.txt" % (chainame,c+1))
tmp = loadtxt( "%s%d.txt" % (chainame,c+1), skiprows=1)
......@@ -149,129 +114,89 @@ def extract_chains( chainame, nchain, begin=0, end=-1):
if imax < 0:
imax = nsamples
nele = (imax-imin)
if c == 0:
chain = zeros( (nchain, npar, nele))
chain[c,:,:] = tmp[imin:imax,:].T
f = open( "%s%d.txt" % (chainame,1))
par = [v for v in f.readline().replace("\n","").split("\t") if v != ""]
chain.append( tmp[imin:imax,:].T)
return(chain,par)
return(chain)
#GR test
def GR( chainame, gap=10000, length_max=-1):
nchain=4
length_min = 50
chain = extract_chains( chainame, nchain)
bla,npar,nsamples = shape(chain)
if length_max == -1:
length_max = nsamples
nele = (length_max-length_min)/gap+1
index_event = 0
R = zeros( (nele, npar))
for index in range(length_min,length_max,gap):
burnin = index/2
mchain = mean( chain[:,:,burnin:index],2)
schain = var( chain[:,:,burnin:index],2)
B_n = var( mchain,0)
W = mean( schain,0)
R[index_event,:] = ( (index-1.)/index*W + B_n*(1.+1./nchain) )/W
index_event = index_event + 1
return(R)
def hist2d(x, y, *args, **kwargs):
"""
Plot a 2-D histogram of samples.
"""
import scipy.stats
import numpy as np
import matplotlib.pyplot as pl
import matplotlib.cm as cm
ax = kwargs.pop("ax", pl.gca())
########################################################################
#CONVERGENCE
########################################################################
extent = kwargs.pop("extent", [[x.min(), x.max()], [y.min(), y.max()]])
bins = kwargs.pop("bins", 50)
color = kwargs.pop("color", "k")
alpha = kwargs.pop("alpha", 0.8)
linewidths = kwargs.pop("linewidths", None)
fill = kwargs.get("fill", True)
datapoints = kwargs.get("datapoints", False)
contours = kwargs.get("contours", True)
cmap = cm.get_cmap(kwargs.get("cmap", "gray"))
sigmas = kwargs.get("sigmas", [1,2,3])
#GR test
def GelmanRubin( chains, gap=10000, length_max=None):
length_min = 1000
X = np.linspace(extent[0][0], extent[0][1], bins + 1)
Y = np.linspace(extent[1][0], extent[1][1], bins + 1)
try:
H, X, Y = np.histogram2d(x.flatten(), y.flatten(), bins=(X, Y),
weights=kwargs.get('weights', None))
except ValueError:
raise ValueError("It looks like at least one of your sample columns "
"have no dynamic range. You could try using the "
"`extent` argument.")
gkde = scipy.stats.gaussian_kde([x, y])
dx = (extent[0][1]-extent[0][0])/(bins+1.)
dy = (extent[1][1]-extent[1][0])/(bins+1.)
# chains = extract_chains( chainame, nchain)
nchain = len(chains)
x2,y2 = np.mgrid[extent[0][0]:extent[0][1]:dx, extent[1][0]:extent[1][1]:dy]
z = np.array(gkde.evaluate([x2.flatten(),y2.flatten()])).reshape(x2.shape)
nsamples = min( [len(c[0,:]) for c in chains])
if length_max==None:
length_max = nsamples
W = (np.exp(-0.5 * np.array(sigmas) ** 2))*z.max()
W = np.append(W[::-1],z.max())
if datapoints:
ax.plot(x, y, "o", color=color, ms=1.5, zorder=-1, alpha=0.1, rasterized=True)
if length_max > nsamples:
print( "Not enough samples...")
exit()
if fill:
ax.contourf(x2, y2, z, W, alpha=alpha, cmap=cmap, antialiased=True)
if contours:
ax.contour(x2, y2, z, W[:np.size(sigmas)-1],linewidths=linewidths, colors=color, antialiased=False)
data = np.vstack([x, y])
mu = np.mean(data, axis=1)
cov = np.cov(data)
if kwargs.pop("plot_ellipse", False):
error_ellipse(mu, cov, ax=ax, edgecolor="r", ls="dashed")
ax.set_xlim(extent[0])
ax.set_ylim(extent[1])
it = range(length_min,length_max,gap)
R = []
for index in it:
# n = index - length_min
# tmp = [c[:,length_min+n/2:index] for c in chains]
n = length_max - index
tmp = [c[:,index+n/2:length_max] for c in chains]
#within Chain
mchain = mean( tmp,2)
schain = var( tmp,2)
#between chains
W = mean( schain,0)
B = var( mchain,0)
#sqrt ?
R.append( ( (index-1.)/index*W + B*(1.+1./nchain) )/W )
return(it,R)
def ctr_level(histogram2d, lvl, infinite=False):
"""
Extract the contours for the 2d plots (Karim Benabed)
"""
import numpy as np
def Geweke( chains, gap=10000, length_max=None):
length_min = 1000
nchain = len(chains)
nsamples = min( [len(c[0,:]) for c in chains])
if length_max==None:
length_max = nsamples
hist = histogram2d.flatten()*1.
hist.sort()
cum_hist = np.cumsum(hist[::-1])
cum_hist /= cum_hist[-1]
if length_max > nsamples:
print( "Not enough samples...")
exit()
it = range(length_min,length_max,gap)
T = []
for index in it:
n = length_max-index
tmp1 = [c[:,index:index+n*0.1] for c in chains]
tmp2 = [c[:,index+n*0.5:length_max] for c in chains]
#Tscore
Z1 = mean( tmp1,2)
Z2 = mean( tmp2,2)
s = sqrt( var(tmp1,2)/(n*0.1) + var(tmp2,2)/(n*0.5) )
T.append( (Z1-Z2)/s)
return(it,T)
########################################################################
alvl = np.searchsorted(cum_hist, lvl)
clist = hist[-alvl]
return clist
########################################################################
#PARAMETER NAMES
########################################################################
parname = {
'omega_b' : '$\\Omega_\mathrm{b}$',
......@@ -343,7 +268,7 @@ parname = {
}
cosmomcName = {
to_cosmomcName = {
'omega_b' : 'omegabh2',
'omega_cdm' : 'omegach2',
'100*theta_s' : 'theta',
......@@ -380,80 +305,93 @@ cosmomcName = {
'Aps143x217' : 'Aps143x217',
}
from_cosmomcName = dict(zip(to_cosmomcName.values(),to_cosmomcName.keys()))
########################################################################
def triangle( chains, nbin=50, params='', name='', npar=6, color=''):
#PLOTS
########################################################################
def triangle( chains, params, nbin=50, parnames=None, name='', npar=6, color=''):
import matplotlib.pyplot as plt
import scipy.ndimage as nd
import matplotlib.ticker as mtick
fig=plt.figure(figsize=(14,12))
plt.subplots_adjust(hspace=0.001,wspace=0.001)
tmp = concatenate( chains)
nchain = len(chains)
if len(color) < nchain:
ax = plt.gca()
color_cycle = ax._get_lines.color_cycle
color = [next(color_cycle) for i in range(nchain)]
if parnames == None:
parnames = [parname.setdefault(p,p) for p in params]
npar = len(params)
npar = min( [npar,len(params)-1])
rge = {}
for par in params:
tmp = []
for chain in chains:
tmp = tmp+list(chain[par])
rge[par] = list(mean(tmp) + dot(std(tmp), [-4,4]))
for xpar in range(1,npar+1):
ax=plt.subplot( npar, npar, (xpar-1)*npar+xpar)
xrge = list(mean(tmp[:,xpar]) + dot(std(tmp[:,xpar]), [-4,4]))
for par in params:
xpar = params.index(par)
ax=plt.subplot( npar, npar, xpar*npar+xpar+1)
for c in range(nchain):
chain = chains[c]
h,X=histogram( chain[:,xpar], bins=nbin, range=xrge, normed=True)
h,X=histogram( chain[par], bins=nbin, range=rge[par], normed=True)
plt.plot( (X[1:]+X[:-1])/2., nd.gaussian_filter( h, 1), color=color[c])
ax.set_xlim( xrge)
ax.set_xlim( rge[par])
ax.yaxis.set_visible(False)
ax.xaxis.set_visible(xpar==npar)
ax.xaxis.set_visible(xpar==(npar-1))
#ax.locator_params( nbins=npar-1)
ax.tick_params(axis='both', which='major', labelsize=6)
ax.tick_params(axis='both', which='minor', labelsize=5)
if 'PS' in parname.setdefault(params[xpar],params[xpar]):
if 'PS' in parname.setdefault(parnames[xpar],parnames[xpar]):
ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e'))
#ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e'))
ax.locator_params(nbins=4)
ax.locator_params(tight=True, nbins=4)
for ypar in range(xpar-1,0,-1):
ax=plt.subplot( npar, npar, (xpar-1)*npar+ypar)
for ypar in range(xpar)[::-1]:
par2 = params[ypar]
ax=plt.subplot( npar, npar, xpar*npar+ypar+1)
yrge = list(mean(tmp[:,ypar]) + dot(std(tmp[:,ypar]), [-4,4]))
for c in range(nchain):
chain = chains[c]
hmap,binX,binY = histogram2d( chain[:,xpar], chain[:,ypar], bins=nbin, range=[xrge,yrge])
hmap,binX,binY = histogram2d( chain[par], chain[par2], bins=nbin, range=[rge[par],rge[par2]])
Y,X = meshgrid((binX[1:]+binX[:-1])/2.,(binY[1:]+binY[:-1])/2.)
plt.contour( X,Y, nd.gaussian_filter(hmap.T,2), levels=ctr_level( hmap, [0.68,0.95]), colors=color[c], extent=tuple(xrge+yrge))
plt.contour( X,Y, nd.gaussian_filter(hmap.T,2), levels=ctr_level( hmap, [0.68,0.95]), colors=color[c], extent=tuple(rge[par]+rge[par2]))
ax.locator_params( nbins=npar-1)
ax.ticklabel_format(useOffset=False)
ax.tick_params(axis='both', which='major', labelsize=6)
ax.tick_params(axis='both', which='minor', labelsize=5)
ax.yaxis.set_visible(ypar==1)
ax.xaxis.set_visible(xpar==npar)
ax.locator_params(nbins=4)
if 'PS' in parname.setdefault(params[ypar],params[ypar]):
ax.yaxis.set_visible(ypar==0)
ax.xaxis.set_visible(xpar==(npar-1))
ax.locator_params(tight=True, nbins=4)